Open-Meteo maintains an API for historical weather that allows for non-commercial usage of historical weather data maintained by the website.
This file builds on _v001, _v002, and _v003 to run exploratory analysis on some historical weather data.
The exploration process uses tidyverse, ranger, several generic custom functions, and several functions specific to Open Meteo processing. First, tidyverse, ranger, and the generic functions are loaded:
library(tidyverse) # tidyverse functionality is included throughout
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ranger) # predict() does not work on ranger objects unless ranger has been called
## Warning: package 'ranger' was built under R version 4.2.3
source("./Generic_Added_Utility_Functions_202105_v001.R") # Basic functions
Next, specific functions written in _v001 are copied:
# Helper function for reading a partial CSV file
partialCSVRead <- function(loc, firstRow=1L, lastRow=+Inf, col_names=TRUE, ...) {
# FUNCTION arguments
# loc: file location
# firstRow: first row that is relevant to the partial file read (whether header line or data line)
# last Row: last row that is relevant to the partial file read (+Inf means read until last line of file)
# col_names: the col_names parameter passed to readr::read_csv
# TRUE means header=TRUE (get column names from file, read data starting on next line)
# FALSE means header=FALSE (auto-generate column names, read data starting on first line)
# character vector means use these as column names (read data starting on first line)
# ...: additional arguments passed to read_csv
# Read the file and return
# skip: rows to be skipped are all those prior to firstRow
# n_max: maximum rows read are lastRow-firstRow, with an additional data row when col_names is not TRUE
readr::read_csv(loc,
col_names=col_names,
skip=firstRow-1,
n_max=lastRow-firstRow+ifelse(isTRUE(col_names), 0, 1),
...
)
}
# Get the break points for gaps in a vector (e.g., 0, 3, 5:8, 20 has break points 0, 3, 5, 20 and 0, 3, 8, 30)
vecGaps <- function(x, addElements=c(), sortUnique=TRUE) {
if(length(addElements)>0) x <- c(addElements, x)
if(isTRUE(sortUnique)) x <- unique(sort(x))
list("starts"=c(x[is.na(lag(x)) | x-lag(x)>1], +Inf),
"ends"=x[is.na(lead(x)) | lead(x)-x>1]
)
}
# Find the break points in a single file
flatFileGaps <- function(loc) {
which(stringr::str_length(readLines(loc))==0) %>% vecGaps(addElements=0)
}
# Read all relevant data as CSV with header
readMultiCSV <- function(loc, col_names=TRUE, ...) {
gaps <- flatFileGaps(loc)
lapply(seq_along(gaps$ends),
FUN=function(x) partialCSVRead(loc,
firstRow=gaps$ends[x]+1,
lastRow=gaps$starts[x+1]-1,
col_names=col_names,
...
)
)
}
# Create URL with specified parameters for downloading data from Open Meteo
openMeteoURLCreate <- function(mainURL="https://archive-api.open-meteo.com/v1/archive",
lat=45,
lon=-90,
startDate=paste(year(Sys.Date())-1, "01", "01", sep="-"),
endDate=paste(year(Sys.Date())-1, "12", "31", sep="-"),
hourlyMetrics=NULL,
dailyMetrics=NULL,
tz="GMT",
...
) {
# Create formatted string
fString <- paste0(mainURL,
"?latitude=",
lat,
"&longitude=",
lon,
"&start_date=",
startDate,
"&end_date=",
endDate
)
if(!is.null(hourlyMetrics)) fString <- paste0(fString, "&hourly=", hourlyMetrics)
if(!is.null(dailyMetrics)) fString <- paste0(fString, "&daily=", dailyMetrics)
# Return the formatted string
paste0(fString, "&timezone=", stringr::str_replace(tz, "/", "%2F"), ...)
}
# Helper function to simplify entry of parameters for Open Meteo download requests
helperOpenMeteoURL <- function(cityName=NULL,
lat=NULL,
lon=NULL,
hourlyMetrics=NULL,
hourlyIndices=NULL,
hourlyDesc=tblMetricsHourly,
dailyMetrics=NULL,
dailyIndices=NULL,
dailyDesc=tblMetricsDaily,
startDate=NULL,
endDate=NULL,
tz=NULL,
...
) {
# Convert city to lat/lon if lat/lon are NULL
if(is.null(lat) | is.null(lon)) {
if(is.null(cityName)) stop("\nMust provide lat/lon or city name available in maps::us.cities\n")
cityData <- maps::us.cities %>% tibble::as_tibble() %>% filter(name==cityName)
if(nrow(cityData)!=1) stop("\nMust provide city name that maps uniquely to maps::us.cities$name\n")
lat <- cityData$lat[1]
lon <- cityData$long[1]
}
# Get hourly metrics by index if relevant
if(is.null(hourlyMetrics) & !is.null(hourlyIndices)) {
hourlyMetrics <- hourlyDesc %>% slice(hourlyIndices) %>% pull(metric)
hourlyMetrics <- paste0(hourlyMetrics, collapse=",")
cat("\nHourly metrics created from indices:", hourlyMetrics, "\n\n")
}
# Get daily metrics by index if relevant
if(is.null(dailyMetrics) & !is.null(dailyIndices)) {
dailyMetrics <- dailyDesc %>% slice(dailyIndices) %>% pull(metric)
dailyMetrics <- paste0(dailyMetrics, collapse=",")
cat("\nDaily metrics created from indices:", dailyMetrics, "\n\n")
}
# Use default values from OpenMeteoURLCreate() for startDate, endDate, and tz if passed as NULL
if(is.null(startDate)) startDate <- eval(formals(openMeteoURLCreate)$startDate)
if(is.null(endDate)) endDate <- eval(formals(openMeteoURLCreate)$endDate)
if(is.null(tz)) tz <- eval(formals(openMeteoURLCreate)$tz)
# Create and return URL
openMeteoURLCreate(lat=lat,
lon=lon,
startDate=startDate,
endDate=endDate,
hourlyMetrics=hourlyMetrics,
dailyMetrics=dailyMetrics,
tz=tz,
...
)
}
# Read JSON data returned from Open Meteo
readOpenMeteoJSON <- function(js, mapDaily=tblMetricsDaily, mapHourly=tblMetricsHourly) {
# FUNCTION arguments:
# js: JSON list returned by download from Open-Meteo
# mapDaily: mapping file for daily metrics
# mapHourly: mapping file for hourly metrics
# Get the object and names
jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
nms <- jsObj %>% names()
cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
# Set default objects as NULL
tblDaily <- NULL
tblHourly <- NULL
tblUnitsDaily <- NULL
tblUnitsHourly <- NULL
# Get daily and hourly as tibble if relevant
if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble() %>% omProcessDaily()
if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble() %>% omProcessHourly()
# Helper function for unit conversions
helperMetricUnit <- function(x, mapper, desc=NULL) {
if(is.null(desc))
desc <- as.list(match.call())$x %>%
deparse() %>%
stringr::str_replace_all(pattern=".*\\$", replacement="")
x %>%
tibble::as_tibble() %>%
pivot_longer(cols=everything()) %>%
left_join(mapper, by=c("name"="metric")) %>%
mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>%
mutate(metricType=desc) %>%
select(metricType, everything())
}
# Get the unit descriptions
if("daily_units" %in% nms) tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, mapDaily)
if("hourly_units" %in% nms) tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, mapHourly)
if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly))
tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
else tblUnits <- NULL
# Put everything else together
tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
tibble::as_tibble()
# Return the list objects
list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
}
# Return Open meteo metadata in prettified format
prettyOpenMeteoMeta <- function(df, extr="tblDescription") {
if("list" %in% class(df)) df <- df[[extr]]
for(name in names(df)) {
cat("\n", name, ": ", df %>% pull(name), sep="")
}
cat("\n\n")
}
# Process Open Meteo daily data
omProcessDaily <- function(tbl, extr="tblDaily") {
if("list" %in% class(tbl)) tbl <- tbl[[extr]]
tbl %>% mutate(date=lubridate::ymd(time)) %>% select(date, everything())
}
# Process Open meteo hourly data
omProcessHourly <- function(tbl, extr="tblHourly") {
if("list" %in% class(tbl)) tbl <- tbl[[extr]]
tbl %>%
mutate(origTime=time,
time=lubridate::ymd_hm(time),
date=lubridate::date(time),
hour=lubridate::hour(time)
) %>%
select(time, date, hour, everything())
}
# Simple predictive model for categorical variable
simpleOneVarPredict <- function(df,
tgt,
prd,
dfTest=NULL,
nPrint=30,
showPlot=TRUE,
returnData=TRUE
) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble with key elements (training data set)
# tgt: target variable
# prd: predictor variable
# dfTest: test dataset for applying predictions
# nPrint: maximum number of lines of confusion matrix to print
# 0 means do not print any summary statistics
# showPlot: boolean, should overlap plot be created and shown?
# Counts of predictor to target variable
dfPred <- df %>%
group_by(across(all_of(c(prd, tgt)))) %>%
summarize(n=n(), .groups="drop") %>%
arrange(across(all_of(prd)), desc(n)) %>%
group_by(across(all_of(prd))) %>%
mutate(correct=row_number()==1, predicted=first(get(tgt))) %>%
ungroup()
# Confusion matrix and accuracy
dfConf <- dfPred %>%
group_by(across(all_of(c(tgt, "correct")))) %>%
summarize(n=sum(n), .groups="drop") %>%
pivot_wider(id_cols=tgt, names_from=correct, values_from=n, values_fill=0) %>%
mutate(n=`TRUE`+`FALSE`,
pctCorrect=`TRUE`/n,
pctNaive=1/(nrow(.)),
lift=pctCorrect/pctNaive-1
)
# Overall confusion matrix
dfConfAll <- dfConf %>%
summarize(nMax=max(n), across(c(`FALSE`, `TRUE`, "n"), sum)) %>%
mutate(pctCorrect=`TRUE`/n,
pctNaive=nMax/n,
lift=pctCorrect/pctNaive-1,
nBucket=length(unique(dfPred[[prd]]))
)
# Print confusion matrices
if(nPrint > 0) {
cat("\nAccuracy by target subgroup (training data):\n")
dfConf %>% print(n=nPrint)
cat("\nOverall Accuracy (training data):\n")
dfConfAll %>% print(n=nPrint)
}
# Plot of overlaps
if(isTRUE(showPlot)) {
p1 <- dfPred %>%
group_by(across(c(all_of(tgt), "predicted", "correct"))) %>%
summarize(n=sum(n), .groups="drop") %>%
ggplot(aes(x=get(tgt), y=predicted)) +
labs(x="Actual",
y="Predicted",
title=paste0("Training data - Actual vs. predicted ", tgt),
subtitle=paste0("(using ", prd, ")")
) +
geom_text(aes(label=n)) +
geom_tile(aes(fill=correct), alpha=0.25)
print(p1)
}
# Create metrics for test dataset if requested
if(!is.null(dfTest)) {
# Get maximum category from training data
mostPredicted <- count(dfPred, predicted, wt=n) %>% slice(1) %>% pull(predicted)
# Get mapping of metric to prediction
dfPredict <- dfPred %>%
group_by(across(all_of(c(prd, "predicted")))) %>%
summarize(n=sum(n), .groups="drop")
# Create predictions for test data
dfPredTest <- dfTest %>%
select(all_of(c(prd, tgt))) %>%
left_join(select(dfPredict, -n)) %>%
replace_na(list(predicted=mostPredicted)) %>%
group_by(across(all_of(c(prd, tgt, "predicted")))) %>%
summarize(n=n(), .groups="drop") %>%
mutate(correct=(get(tgt)==predicted))
# Create confusion statistics for test data
dfConfTest <- dfPredTest %>%
group_by(across(all_of(c(tgt, "correct")))) %>%
summarize(n=sum(n), .groups="drop") %>%
pivot_wider(id_cols=tgt, names_from=correct, values_from=n, values_fill=0) %>%
mutate(n=`TRUE`+`FALSE`,
pctCorrect=`TRUE`/n,
pctNaive=1/(nrow(.)),
lift=pctCorrect/pctNaive-1
)
# Overall confusion matrix for test data
dfConfAllTest <- dfConfTest %>%
summarize(nMax=max(n), across(c(`FALSE`, `TRUE`, "n"), sum)) %>%
mutate(pctCorrect=`TRUE`/n,
pctNaive=nMax/n,
lift=pctCorrect/pctNaive-1,
nBucket=length(unique(dfConfTest[[prd]]))
)
# Print confusion matrices
if(nPrint > 0) {
cat("\nAccuracy by target subgroup (testing data):\n")
dfConfTest %>% print(n=nPrint)
cat("\nOverall Accuracy (testing data):\n")
dfConfAllTest %>% print(n=nPrint)
}
} else {
dfPredTest <- NULL
dfConfTest <- NULL
dfConfAllTest <- NULL
}
# Return data if requested
if(isTRUE(returnData)) list(dfPred=dfPred,
dfConf=dfConf,
dfConfAll=dfConfAll,
dfPredTest=dfPredTest,
dfConfTest=dfConfTest,
dfConfAllTest=dfConfAllTest
)
}
# Fit a single predictor to a single categorical variable
simpleOneVarFit <- function(df,
tgt,
prd,
rankType="last",
naMethod=TRUE
) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble with key elements (training data set)
# tgt: target variable
# prd: predictor variable
# rankType: method for breaking ties of same n, passed to base::rank as ties.method=
# naMethod: method for handling NA in ranks, passed to base::rank as na.last=
# Counts of predictor to target variable, and associated predictions
df %>%
group_by(across(all_of(c(prd, tgt)))) %>%
summarize(n=n(), .groups="drop") %>%
arrange(across(all_of(prd)), desc(n), across(all_of(tgt))) %>%
group_by(across(all_of(prd))) %>%
mutate(rankN=n()+1-rank(n, ties.method=rankType, na.last=naMethod)) %>%
arrange(across(all_of(prd)), rankN) %>%
ungroup()
}
# Create categorical predictions mapper
simpleOneVarMapper <- function(df, tgt, prd) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble from SimpleOneVarFit()
# tgt: target variable
# prd: predictor variable
# Get the most common actual results
dfCommon <- df %>% count(across(all_of(tgt)), wt=n, sort=TRUE)
# Get the predictions
dfPredictor <- df %>%
group_by(across(all_of(prd))) %>%
filter(row_number()==1) %>%
select(all_of(c(prd, tgt))) %>%
ungroup()
list(dfPredictor=dfPredictor, dfCommon=dfCommon)
}
# Map the categorical predictions to unseen data
simpleOneVarApplyMapper <- function(df,
tgt,
prd,
mapper,
mapperDF="dfPredictor",
mapperDefault="dfCommon",
prdName="predicted"
) {
# FUNCTION ARGUMENTS:
# df: data frame containing prd for predicting tgt
# tgt: target variable in df
# prd: predictor variable in df
# mapper: mapping list from sinpleOneVarMapper()
# mapperDF: element that can be used to merge mappings
# mapperDefault: element that can be used for NA resulting from merging mapperDF
# prdName: name for the prediction variable
# Extract the mapper and default value
vecRename <- c(prdName) %>% purrr::set_names(tgt)
dfMap <- mapper[[mapperDF]] %>% select(all_of(c(prd, tgt))) %>% colRenamer(vecRename=vecRename)
chrDefault <- mapper[[mapperDefault]] %>% slice(1) %>% pull(tgt)
# Merge mappings to df
df %>%
left_join(dfMap, by=prd) %>%
replace_na(list("predicted"=chrDefault))
}
# Create confusion matrix data for categorical predictions
simpleOneVarConfusionData <- function(df,
tgtOrig,
tgtPred,
otherVars=c(),
weightBy="n"
) {
# FUNCTION ARGUMENTS:
# df: data frame from simpleOneVarApplyMapper()
# tgtOrig: original target variable name in df
# tgtPred: predicted target variable name in df
# otherVars: other variables to be kept (will be grouping variables)
# weightBy: weighting variable for counts in df (NULL means count each row of df as 1)
# Confusion matrix data creation
df %>%
group_by(across(all_of(c(tgtOrig, tgtPred, otherVars)))) %>%
summarize(n=if(!is.null(weightBy)) sum(get(weightBy)) else n(), .groups="drop") %>%
mutate(correct=get(tgtOrig)==get(tgtPred))
}
# Print and plot confusion matrix for categorical predictions
simpleOneVarConfusionReport <- function(df,
tgtOrig,
tgtPred,
otherVars=c(),
printConf=TRUE,
printConfOrig=printConf,
printConfPred=printConf,
printConfOverall=printConf,
plotConf=TRUE,
plotDesc="",
nBucket=NA,
predictorVarName="",
returnData=FALSE
) {
# FUNCTION ARGUMENTS:
# df: data frame from simpleOneVarConfusionData()
# tgtOrig: original target variable name in df
# tgtPred: predicted target variable name in df
# otherVars: other variables to be kept (will be grouping variables) - NOT IMPLEMENTED
# printConf: boolean, should confusion matrix data be printed? Applies to all three
# printConfOrig: boolean, should confusion data be printed based on original target variable?
# printConfPred: boolean, should confusion data be printed based on predicted target variable?
# printConfOverall: boolean, should overall confusion data be printed?
# plotConf: boolean, should confusion overlap data be plotted?
# plotDesc: descriptive label to be included in front of plot title
# nBucket: number of buckets used for prediction (pass from previous data)
# predictorVarName: variable name to be included in chart description
# returnData: boolean, should the confusion matrices be returned?
# Confusion data based on original target variable
if(isTRUE(printConfOrig) | isTRUE(returnData)) {
dfConfOrig <- df %>%
group_by(across(all_of(c(tgtOrig)))) %>%
summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
mutate(pctRight=right/n, pctNaive=n/(sum(n)), lift=pctRight/pctNaive-1)
}
# Confusion data based on predicted target variable
if(isTRUE(printConfPred) | isTRUE(returnData)) {
dfConfPred <- df %>%
group_by(across(all_of(c(tgtPred)))) %>%
summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
mutate(pctRight=right/n)
}
# Overall confusion data
if(isTRUE(printConfOverall) | isTRUE(returnData)) {
maxNaive <- df %>%
group_by(across(all_of(tgtOrig))) %>%
summarize(n=sum(n), .groups="drop") %>%
arrange(desc(n)) %>%
slice(1) %>%
pull(n)
dfConfOverall <- df %>%
summarize(right=sum(n*correct), wrong=sum(n)-right, n=sum(n), .groups="drop") %>%
mutate(maxN=maxNaive, pctRight=right/n, pctNaive=maxN/n, lift=pctRight/pctNaive-1, nBucket=nBucket)
}
# Confusion report based on original target variable
if(isTRUE(printConfOrig)) {
cat("\nConfusion data based on original target variable:", tgtOrig, "\n")
dfConfOrig %>%
print(n=50)
}
# Confusion report based on predicted target variable
if(isTRUE(printConfPred)) {
cat("\nConfusion data based on predicted target variable:", tgtPred, "\n")
dfConfPred %>%
print(n=50)
}
# Overall confusion matrix
if(isTRUE(printConfOverall)) {
cat("\nOverall confusion matrix\n")
dfConfOverall %>%
print(n=50)
}
# Plot of overlaps
if(isTRUE(plotConf)) {
p1 <- df %>%
group_by(across(all_of(c(tgtOrig, tgtPred, "correct")))) %>%
summarize(n=sum(n), .groups="drop") %>%
ggplot(aes(x=get(tgtOrig), y=get(tgtPred))) +
labs(x="Actual",
y="Predicted",
title=paste0(plotDesc, "Actual vs. predicted ", tgtOrig),
subtitle=paste0("(using ", predictorVarName, ")")
) +
geom_text(aes(label=n)) +
geom_tile(aes(fill=correct), alpha=0.25)
print(p1)
}
# Return data if requested
if(isTRUE(returnData)) list(dfConfOrig=dfConfOrig, dfConfPred=dfConfPred, dfConfOverall=dfConfOverall)
}
# Process for chaining predictor, applier, and confusion matrix for categorical variables
simpleOneVarChain <- function(df,
tgt,
prd,
mapper=NULL,
rankType="last",
naMethod=TRUE,
printReport=TRUE,
plotDesc="",
returnData=TRUE,
includeConfData=FALSE
) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble with key elements (training or testing data set)
# tgt: target variable
# prd: predictor variable
# mapper: mapping file to be applied for predictions (NULL means create from simpleOneVarApply())
# rankType: method for breaking ties of same n, passed to base::rank as ties.method=
# naMethod: method for handling NA in ranks, passed to base::rank as na.last=
# printReport: boolean, should the confusion report data and plot be printed?
# plotDesc: descriptive label to be included in front of plot title
# returnData: boolean, should data elements be returned?
# includeConfData: boolean, should confusion data be returned?
# Create the summary of predictor-target-n
dfFit <- simpleOneVarFit(df, tgt=tgt, prd=prd, rankType=rankType, naMethod=naMethod)
# Create the mapper if it does not already exist
if(is.null(mapper)) mapper <- simpleOneVarMapper(dfFit, tgt=tgt, prd=prd)
# Apply mapper to data
dfApplied <- simpleOneVarApplyMapper(dfFit, tgt=tgt, prd=prd, mapper=mapper)
# Create confusion data
dfConfusion <- simpleOneVarConfusionData(dfApplied, tgtOrig=tgt, tgtPred="predicted")
# Create confusion report if requested
if(isTRUE(printReport) | isTRUE(includeConfData)) {
dfConfReport <- simpleOneVarConfusionReport(df=dfConfusion,
tgtOrig=tgt,
tgtPred="predicted",
nBucket=length(unique(dfApplied[[prd]])),
predictorVarName=prd,
printConf=printReport,
plotConf=printReport,
plotDesc=plotDesc,
returnData=includeConfData
)
}
# Return data if requested
if(isTRUE(returnData)) {
ret <- list(dfFit=dfFit, mapper=mapper, dfApplied=dfApplied, dfConfusion=dfConfusion)
if(isTRUE(includeConfData)) ret<-c(ret, list(dfConfData=dfConfReport))
ret
}
}
# Adds a train-test component for single variable predictions
simpleOneVarTrainTest <- function(dfTrain,
dfTest,
tgt,
prd,
rankType="last",
naMethod=TRUE,
printReport=FALSE,
includeConfData=TRUE,
returnData=TRUE
) {
# FUNCTION ARGUMENTS:
# dfTrain: data frame or tibble with key elements (training data set)
# dfTest: data frame or tibble with key elements (testing data set)
# tgt: target variable
# prd: predictor variable
# rankType: method for breaking ties of same n, passed to base::rank as ties.method=
# naMethod: method for handling NA in ranks, passed to base::rank as na.last=
# printReport: boolean, should the confusion report data and plot be printed?
# includeConfData: boolean, should confusion data be returned?
# returnData: boolean, should data elements be returned?
# Fit the training data
tmpTrain <- simpleOneVarChain(df=dfTrain,
tgt=tgt,
prd=prd,
rankType=rankType,
naMethod=naMethod,
printReport=printReport,
plotDesc="Training data: ",
returnData=TRUE,
includeConfData=includeConfData
)
# Fit the testing data
tmpTest <- simpleOneVarChain(df=dfTest,
tgt=tgt,
prd=prd,
mapper=tmpTrain$mapper,
rankType=rankType,
naMethod=naMethod,
printReport=printReport,
plotDesc="Testing data: ",
returnData=TRUE,
includeConfData=includeConfData
)
# Return data if requested
if(isTRUE(returnData)) list(tmpTrain=tmpTrain, tmpTest=tmpTest)
}
# Plot the means by cluster and variable for a k-means object
plotClusterMeans <- function(km, nrow=NULL, ncol=NULL, scales="fixed") {
# FUNCTION ARGUMENTS
# km: object returned by stats::kmeans(...)
# nrow: number of rows for faceting (NULL means default)
# ncol: number of columns for faceting (NULL means default)
# scales: passed to facet_wrap as scales=scales
# Assess clustering by dimension
p1 <- km$centers %>%
tibble::as_tibble() %>%
mutate(cluster=row_number()) %>%
pivot_longer(cols=-c(cluster)) %>%
ggplot(aes(x=fct_reorder(name,
value,
.fun=function(a) ifelse(length(a)==2, a[2]-a[1], diff(range(a)))
),
y=value
)
) +
geom_point(aes(color=factor(cluster))) +
scale_color_discrete("Cluster") +
facet_wrap(~factor(cluster), nrow=nrow, ncol=ncol, scales=scales) +
labs(title=paste0("Cluster means (kmeans, centers=", nrow(km$centers), ")"),
x="Metric",
y="Cluster mean"
) +
geom_hline(yintercept=median(km$centers), lty=2) +
coord_flip()
print(p1)
}
# Plot percentage by cluster
plotClusterPct <- function(df, km, keyVars, nRowFacet=1, printPlot=TRUE) {
# FUNCTION ARGUMENTS:
# df: data frame initially passed to stats::kmeans(...)
# km: object returned by stats::kmeans(...)
# keyVars: character vector of length 1 (y-only, x will be cl) or length 2 (x, y, cl will facet)
# nRowFacet: number of rows for facetting (only relevant if length(keyVars) is 2)
# printPlot: boolean, should plot be printed? (if not true, plot will be returned)
# Check length of keyVars
if(!(length(keyVars) %in% c(1, 2))) stop("\nArgument keyVars must be length-1 or length-2\n")
p1 <- df %>%
mutate(cl=factor(km$cluster)) %>%
group_by(across(c(all_of(keyVars), "cl"))) %>%
summarize(n=n(), .groups="drop") %>%
group_by(across(all_of(keyVars))) %>%
mutate(pct=n/sum(n)) %>%
ungroup() %>%
ggplot() +
scale_fill_continuous(low="white", high="green") +
labs(title=paste0("Percentage by cluster (kmeans with ", nrow(km$centers), " centers)"),
x=ifelse(length(keyVars)==1, "Cluster", keyVars[1]),
y=ifelse(length(keyVars)==1, keyVars[1], keyVars[2])
)
if(length(keyVars)==1) p1 <- p1 + geom_tile(aes(fill=pct, x=cl, y=get(keyVars[1])))
if(length(keyVars)==2) {
p1 <- p1 +
geom_tile(aes(fill=pct, x=get(keyVars[1]), y=get(keyVars[2]))) +
facet_wrap(~cl, nrow=nRowFacet)
}
if(isTRUE(printPlot)) print(p1)
else return(p1)
}
# Run k-means (or use passed k-means object) and plot centers and percentages of observations
runKMeans <- function(df,
km=NULL,
vars=NULL,
centers=2,
nStart=1L,
iter.max=10L,
seed=NULL,
plotMeans=FALSE,
nrowMeans=NULL,
plotPct=NULL,
nrowPct=1,
returnKM=is.null(km)
) {
# FUNCTION ARGUMENTS:
# df: data frame for clustering
# km: k-means object (will shut off k-means processing and run as plot-only)
# vars: variables to be used for clustering (NULL means everything in df)
# centers: number of centers
# nStart: passed to kmeans
# iter.max: passed to kmeans
# seed: seed to be set (if NULL, no seed is set)
# plotMeans: boolean, plot variable means by cluster?
# nrowMeans: argument passed as nrow for faceting rows in plotClusterMeans() - NULL is default ggplot2
# plotPct: list of character vectors to be passed sequentially as keyVars to plotClusterPct()
# NULL means do not run
# pctByCluster=list(c("var1"), c("var2", "var3")) will run plotting twice
# nrowPct: argument for faceting number of rows in plotClusterPct()
# returnKM: boolean, should the k-means object be returned?
# Set seed if requested
if(!is.null(seed)) set.seed(seed)
# Get the variable names if passed as NULL
if(is.null(vars)) vars <- names(df)
# Run the k-means process if the object has not been passed
if(is.null(km)) {
km <- df %>%
select(all_of(vars)) %>%
kmeans(centers=centers, iter.max=iter.max, nstart=nStart)
}
# Assess clustering by dimension if requested
if(isTRUE(plotMeans)) plotClusterMeans(km, nrow=nrowMeans)
if(!is.null((plotPct)))
for(ctr in 1:length(plotPct))
plotClusterPct(df=df, km=km, keyVars=plotPct[[ctr]], nRowFacet=nrowPct)
# Return the k-means object
if(isTRUE(returnKM)) return(km)
}
# Assign points to closest center of a passed k-means object
assignKMeans <- function(km, df, returnAllDistanceData=FALSE) {
# FUNCTION ARGUMENTS:
# km: a k-means object
# df: data frame or tibble
# returnAllDistanceData: boolean, should the distance data and clusters be returned?
# TRUE returns a data frame with distances as V1, V2, ..., and cluster as cl
# FALSE returns a vector of cluster assignments as integers
# Select columns from df to match km
df <- df %>% select(all_of(colnames(km$centers)))
if(!all.equal(names(df), colnames(km$centers))) stop("\nName mismatch in clustering and frame\n")
# Create the distances and find clusters
distClust <- sapply(seq_len(nrow(km$centers)),
FUN=function(x) sqrt(rowSums(sweep(as.matrix(df),
2,
t(as.matrix(km$centers[x,,drop=FALSE]))
)**2
)
)
) %>%
as.data.frame() %>%
tibble::as_tibble() %>%
mutate(cl=apply(., 1, which.min))
# Return the proper file
if(isTRUE(returnAllDistanceData)) return(distClust)
else return(distClust$cl)
}
As well, specific functions from _v002 and _v003 are copied:
runSimpleRF <- function(df, yVar, xVars=NULL, ...) {
# FUNCTION ARGUMENTS:
# df: data frame containing observations
# yVar: variable to be predicted (numeric for regression, categorical for classification)
# xVars: predictor variables (NULL means everything in df except for yVar)
# ...: other arguments passed to ranger::ranger
# Create xVars if passed as NULL
if(is.null(xVars)) xVars <- setdiff(names(df), yVar)
# Simple random forest model
ranger::ranger(as.formula(paste0(yVar, "~", paste0(xVars, collapse="+"))),
data=df[, c(yVar, xVars)],
...
)
}
plotRFImportance <- function(rf,
impName="variable.importance",
divBy=1000,
plotTitle=NULL,
plotData=TRUE,
returnData=!isTRUE(plotData)
) {
# FUNCTION ARGUMENTS:
# rf: output list from random forest with an element for importance
# impName: name of the element to extract from rf
# divBy: divisor for the importance variable
# plotTitle: title for plot (NULL means use default)
# plotData: boolean, should the importance plot be created and printed?
# returnData: boolean, should the processed data be returned?
# Create title if not provided
if(is.null(plotTitle)) plotTitle <- "Importance for simple random forest"
# Create y-axis label
yAxisLabel="Variable Importance"
if(!isTRUE(all.equal(divBy, 1))) yAxisLabel <- paste0(yAxisLabel, " (", divBy, "s)")
# Create variable importance
df <- rf[[impName]] %>%
as.data.frame() %>%
purrr::set_names("imp") %>%
rownames_to_column("metric") %>%
tibble::as_tibble()
# Create and print plot if requested
if(isTRUE(plotData)) {
p1 <- df %>%
ggplot(aes(x=fct_reorder(metric, imp), y=imp/divBy)) +
geom_col(fill="lightblue") +
labs(x=NULL, y=yAxisLabel, title=plotTitle) +
coord_flip()
print(p1)
}
# Return data if requested
if(isTRUE(returnData)) return(df)
}
predictRF <- function(rf, df, newCol="pred", predsOnly=FALSE) {
# FUNCTION ARGUMENTS:
# rf: a trained random forest model
# df: data frame for adding predictions
# newCol: name for new column to be added to df
# predsOnly: boolean, should only the vector of predictions be returned?
# if FALSE, a column named newCol is added to df, with df returned
# Performance on holdout data
preds <- predict(rf, data=df)$predictions
# Return just the predictions if requested otherwise add as final column to df
if(isTRUE(predsOnly)) return(preds)
else {
df[newCol] <- preds
return(df)
}
}
# Update for continuous variables
reportAccuracy <- function(df,
trueCol,
predCol="pred",
reportAcc=TRUE,
rndReport=2,
useLabel="requested data",
returnAcc=!isTRUE(reportAcc),
reportR2=FALSE
) {
# FUNCTION ARGUMENTS:
# df: data frame containing actual and predictions
# trueCol: column containing true value
# predCol: column containing predicted value
# reportAcc: boolean, should accuracy be reported (printed to output)?
# rndReport: number of significant digits for reporting (will be converted to percentage first)
# useLabel: label for data to be used in reporting
# returnAcc: boolean, should the accuracy be returned
# return value is not converted to percentage, not rounded
# reportR2: boolean, should accuracy be calculated as R-squared?
# (default FALSE measures as categorical)
# Continuous or categorical reporting
if(isTRUE(reportR2)) {
tc <- df %>% pull(get(trueCol))
pc <- df %>% pull(get(predCol))
mseNull <- mean((tc-mean(tc))**2)
msePred <- mean((tc-pc)**2)
r2 <- 1 - msePred/mseNull
if(isTRUE(reportAcc))
cat("\nR-squared of ",
useLabel,
" is: ",
round(100*r2, rndReport),
"% (RMSE ",
round(sqrt(msePred), 2),
" vs. ",
round(sqrt(mseNull), 2),
" null)\n",
sep=""
)
acc <- c("mseNull"=mseNull, "msePred"=msePred, "r2"=r2)
} else {
acc <- mean(df[trueCol]==df[predCol])
if(isTRUE(reportAcc))
cat("\nAccuracy of ", useLabel, " is: ", round(100*acc, rndReport), "%\n", sep="")
}
# Return accuracy statistic if requested
if(isTRUE(returnAcc)) return(acc)
}
# Update for automated rounding
plotConfusion <- function(df,
trueCol,
predCol="pred",
useTitle=NULL,
useSub=NULL,
plotCont=FALSE,
rndTo=NULL,
rndBucketsAuto=100,
nSig=NULL,
refXY=FALSE
) {
# FUNCTION ARGUMENTS:
# df: data frame containing actual and predictions
# trueCol: column containing true value
# predCol: column containing predicted value
# useTitle: title to be used for chart (NULL means create from trueCol)
# useSub: subtitle to be used for chart (NULL means none)
# plotCont: boolean, should plotting assume continuous variables?
# (default FALSE assumes confusion plot for categorical variables)
# rndTo: every number in x should be rounded to the nearest rndTo
# NULL means no rounding (default)
# -1L means make an estimate based on data
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
# Create title if not supplied
if(is.null(useTitle)) useTitle <- paste0("Predicting ", trueCol)
# Function auto-round returns vector as-is when rndTo is NULL and auto-rounds when rndTo is -1L
df <- df %>%
mutate(across(all_of(c(trueCol, predCol)),
.fns=function(x) autoRound(x, rndTo=rndTo, rndBucketsAuto=rndBucketsAuto, nSig=nSig)
)
)
# Create base plot (applicable to categorical or continuous variables)
# Use x as true and y as predicted, for more meaningful geom_smooth() if continuous
# Flip coordinates if categorical
p1 <- df %>%
group_by(across(all_of(c(trueCol, predCol)))) %>%
summarize(n=n(), .groups="drop") %>%
ggplot(aes(y=get(predCol), x=get(trueCol))) +
labs(y="Predicted", x="Actual", title=useTitle, subtitle=useSub)
# Update plot as appropriate
if(isTRUE(plotCont)) {
p1 <- p1 +
geom_point(aes(size=n), alpha=0.5) +
scale_size_continuous("# Obs") +
geom_smooth(aes(weight=n), method="lm")
if(isTRUE(refXY)) p1 <- p1 + geom_abline(slope=1, intercept=0, lty=2, color="red")
} else {
p1 <- p1 +
geom_tile(aes(fill=n)) +
geom_text(aes(label=n), size=2.5) +
coord_flip() +
scale_fill_continuous("", low="white", high="green")
}
# Output plot
print(p1)
}
runFullRF <- function(dfTrain,
yVar,
xVars,
dfTest=dfTrain,
useLabel="test data",
useSub=NULL,
isContVar=FALSE,
rndTo=NULL,
rndBucketsAuto=100,
nSig=NULL,
refXY=FALSE,
makePlots=TRUE,
plotImp=makePlots,
plotConf=makePlots,
returnData=FALSE,
...
) {
# FUNCTION ARGUMENTS:
# dfTrain: training data
# yVar: dependent variable
# xVars: column(s) containing independent variables
# dfTest: test dataset for applying predictions
# useLabel: label to be used for reporting accuracy
# useSub: subtitle to be used for confusion chart (NULL means none)
# isContVar: boolean, is the variable continuous? (default FALSE means categorical)
# rndTo: every number in x should be rounded to the nearest rndTo
# NULL means no rounding (default)
# -1L means make an estimate based on data
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
# makePlots: boolean, should plots be created for variable importance and confusion matrix?
# plotImp: boolean, should variable importance be plotted? (default is makePlots)
# plotConf: boolean, should confusion matrix be plotted? (default is makePlots)
# returnData: boolean, should data be returned?
# ...: additional parameters to pass to runSimpleRF(), which are then passed to ranger::ranger()
# 1. Run random forest using impurity for importance
rf <- runSimpleRF(df=dfTrain, yVar=yVar, xVars=xVars, importance="impurity", ...)
# 2. Create, and optionally plot, variable importance
rfImp <- plotRFImportance(rf, plotData=plotImp, returnData=TRUE)
# 3. Predict on test dataset
tstPred <- predictRF(rf=rf, df=dfTest)
# 4. Report on accuracy (updated for continuous or categorical)
rfAcc <- reportAccuracy(tstPred,
trueCol=yVar,
rndReport=3,
useLabel=useLabel,
reportR2=isTRUE(isContVar),
returnAcc=TRUE
)
# 5. Plot confusion data (updated for continuous vs. categorical) if requested
if(isTRUE(plotConf)) {
plotConfusion(tstPred,
trueCol=yVar,
useSub=useSub,
plotCont=isTRUE(isContVar),
rndTo=rndTo,
rndBucketsAuto=rndBucketsAuto,
nSig=nSig,
refXY=refXY
)
}
#6. Return data if requested
if(isTRUE(returnData)) return(list(rf=rf, rfImp=rfImp, tstPred=tstPred, rfAcc=rfAcc))
}
runPartialImportanceRF <- function(dfTrain,
yVar,
dfTest,
impDB=dfImp,
nImp=+Inf,
otherX=c(),
isContVar=TRUE,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
rndTo=NULL,
rndBucketsAuto=50,
nSig=NULL,
refXY=FALSE,
makePlots=FALSE,
returnElem=c("rfImp", "rfAcc")
) {
# FUNCTION ARGUMENTS
# dfTrain: training data
# yVar: y variable in dfTrain
# dfTest: test data
# impDB: tibble containing variable importance by dependent variable
# nImp: use the top nImp variables by variable importance
# otherX: include these additional x variables
# isContVar: boolean, is this a continuous variable (regression)? FALSE means classification
# useLabel: label for description
# useSub: label for plot
# rndTo: controls the rounding parameter for plots, passed to runFullRF
# (NULL means no rounding)
# -1L means make an estimate based on underlying data
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# refXY: controls the reference line parameter for plots, passed to runFullRF
# makePlots: boolean, should plots be created?
# returnElem: character vector of list elements to be returned
runFullRF(dfTrain=dfTrain,
yVar=yVar,
xVars=unique(c(impDB %>% filter(n<=nImp, src==yVar) %>% pull(metric), otherX)),
dfTest=dfTest,
isContVar = isContVar,
useLabel=useLabel,
useSub=useSub,
rndTo=rndTo,
rndBucketsAuto=rndBucketsAuto,
nSig=nSig,
refXY=refXY,
makePlots=makePlots,
returnData=TRUE
)[returnElem]
}
autoRound <- function(x, rndTo=-1L, rndBucketsAuto=100, nSig=NULL) {
# FUNCTION ARGUMENTS
# x: vector to be rounded
# rndTo: every number in x should be rounded to the nearest rndTo
# NULL means no rounding
# -1L means make an estimate based on data (default)
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# If rndTo is passed as NULL, return x as-is
if(is.null(rndTo)) return(x)
# If rndTo is passed as -1L, make an estimate for rndTo
if(isTRUE(all.equal(-1L, rndTo))) {
# Get the number of unique values in x
nUq <- length(unique(x))
# If the number of unique values is no more than 150% of rndToBucketsAuto, return as-is
if(nUq <= 1.5*rndBucketsAuto) return(x)
# Otherwise, calculate a value for rndTo
rndTo <- diff(range(x)) / rndBucketsAuto
# Truncate to requested number of significant digits
if(!is.null(nSig)) rndTo <- signif(rndTo, digits=nSig)
}
# Return the rounded vector if it was not already returned
return(round(x/rndTo)*rndTo)
}
autoPartialImportance <- function(dfTrain,
dfTest,
yVar,
isContVar,
impDB=dfImp,
impNums=c(1:10, 16, 25, nrow(filter(dfImp, src==yVar)))
) {
# FUNCTION ARGUMENTS:
# dfTrain: training data
# dfTest: test (holdout) data
# yVar: dependent variable
# isContVar: boolean, is this a contnuous variable (R-2) or categorical variable (accuracy)?
# impDB: tibble containing sorted variable importances by predictor
# impNums: vector of number of variables to run (each element in vector run)
# Accuracy on holdout data
tblRPI <- tibble::tibble(nImp=impNums,
rfAcc=sapply(impNums,
FUN=function(x) {y <- runPartialImportanceRF(dfTrain=dfTrain,
yVar=yVar,
dfTest=dfTest,
isContVar=isContVar,
impDB=impDB,
nImp=x,
makePlots=FALSE
)[["rfAcc"]]
if(isTRUE(isContVar)) y <- y["r2"]
y
}
)
)
print(tblRPI)
# Plot of holdout accuracy/r-squared vs. number of variables
# if(isTRUE(isContVar)) tblRPI <- tblRPI %>% mutate(rfAcc=r2)
if(isTRUE(isContVar)) prtDesc <- "R-squared" else prtDesc <- "Accuracy"
p1 <- tblRPI %>%
select(nImp, rfAcc) %>%
bind_rows(tibble::tibble(nImp=0, rfAcc=0)) %>%
ggplot(aes(x=nImp, y=rfAcc)) +
geom_line() +
geom_point() +
labs(title=paste0(prtDesc, " on holdout data vs. number of predictors"),
subtitle=paste0("Predicting ", yVar),
y=paste0(prtDesc, " on holdout data"),
x="# Predictors (selected in order of variable importance in full model)"
) +
lims(y=c(0, 1)) +
geom_hline(data=~filter(., rfAcc==max(rfAcc)), aes(yintercept=rfAcc), lty=2)
print(p1)
return(tblRPI)
}
runNextBestPredictor <- function(varsRun,
xFix,
yVar,
isContVar,
dfTrain,
dfTest=dfTrain,
useLabel="predictions based on training data applied to holdout dataset",
useSub=stringr::str_to_sentence(keyLabel_v3),
makePlots=FALSE
) {
# FUNCTION ARGUMENTS:
# varsRun: variables to be run as potential next-best predictors
# xFix: variables that are already included in every test of next-best
# yVar: dependent variable of interest
# isContVar: boolean, is yvar continuous?
# dfTrain: training data
# dfTest: test data
# useLabel: descriptive label
# useSub: subtitle description
# makePlots: boolean, should plots be created for each predictor run?
vecAcc <- sapply(varsRun, FUN=function(x) {
y <- runFullRF(dfTrain=dfTrain,
yVar=yVar,
xVars=c(xFix, x),
dfTest=dfTest,
useLabel=useLabel,
useSub=useSub,
isContVar=isContVar,
makePlots=makePlots,
returnData=TRUE
)[["rfAcc"]]
if(isTRUE(isContVar)) y[["r2"]] else y
}
)
vecAcc %>%
as.data.frame() %>%
purrr::set_names("rfAcc") %>%
rownames_to_column("pred") %>%
tibble::tibble() %>%
arrange(desc(rfAcc)) %>%
print(n=40)
vecAcc
}
getNextBestVar <- function(x, returnTbl=FALSE, n=if(isTRUE(returnTbl)) +Inf else 1) {
# FUNCTION ARGUMENTS:
# x: named vector of accuracy or r-squared
# returnTbl: boolean, if TRUE convert to tibble and return, if FALSE return vector of top-n predictors
# n: number of predictrs to return (+Inf will return the full tibble or vector)
tbl <- vecToTibble(x, colNameName="pred") %>%
arrange(-value) %>%
slice_head(n=n)
if(isTRUE(returnTbl)) return(tbl)
else return(tbl %>% pull(pred))
}
newCityPredict <- function(rf,
dfTest,
trueCol,
isContVar=FALSE,
reportR2=isTRUE(isContVar),
plotCont=isTRUE(isContVar),
reportAcc=TRUE,
rndReport=2,
useLabel="requested data",
useTitle=NULL,
useSub=NULL,
rndTo=NULL,
rndBucketsAuto=100,
nSig=NULL,
refXY=FALSE,
returnData=TRUE
) {
# FUNCTION ARGUMENTS:
# rf: The existing "ranger" model OR a list containing element "rf" that has the existing "ranger" model
# dfTest: the new dataset for predictions
# trueCol: column containing true value
# isContVar: boolean, is the variable continuous? (default FALSE means categorical)
# reportR2: boolean, should accuracy be calculated as R-squared?
# (FALSE measures as categorical)
# plotCont: boolean, should plotting assume continuous variables?
# (FALSE assumes confusion plot for categorical variables)
# reportAcc: boolean, should accuracy be reported (printed to output)?
# rndReport: number of significant digits for reporting (will be converted to percentage first)
# useLabel: label for data to be used in reporting
# useTitle: title to be used for chart (NULL means create from trueCol)
# useSub: subtitle to be used for chart (NULL means none)
# rndTo: every number in x should be rounded to the nearest rndTo
# NULL means no rounding (default)
# -1L means make an estimate based on data
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
# returnData: boolean, should a list be returned containing tstPred and rfAcc?
# Get the ranger data
if(!("ranger" %in% class(rf))) {
if(!("rf" %in% names(rf))) {
stop("\nERROR: rf must be of class 'ranger' OR a list with element 'rf' that is of class 'ranger")
}
rf <- rf[["rf"]]
}
if(!("ranger" %in% class(rf)))
stop("\nERROR: rf must be of class 'ranger' OR a list with element 'rf' that is of class 'ranger")
# Predict on new dataset
tstPred <- predictRF(rf=rf, df=dfTest)
# Report on accuracy
rfAcc <- reportAccuracy(tstPred,
trueCol=trueCol,
reportAcc=reportAcc,
rndReport=rndReport,
useLabel=useLabel,
reportR2=reportR2,
returnAcc=TRUE
)
# Plot confusion data
plotConfusion(tstPred,
trueCol=trueCol,
useTitle=useTitle,
useSub=useSub,
plotCont=plotCont,
rndTo=rndTo,
rndBucketsAuto=rndBucketsAuto,
nSig=nSig,
refXY=refXY
)
# Return data if requested
if(isTRUE(returnData)) return(list(tstPred=tstPred, rfAcc=rfAcc))
}
Key mapping tables for available metrics are also copied:
hourlyMetrics <- "temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm"
dailyMetrics <- "weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration"
hourlyDescription <- "Air temperature at 2 meters above ground\nRelative humidity at 2 meters above ground\nDew point temperature at 2 meters above ground\nApparent temperature is the perceived feels-like temperature combining wind chill factor, relative humidity and solar radiation\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nTotal precipitation (rain, showers, snow) sum of the preceding hour. Data is stored with a 0.1 mm precision. If precipitation data is summed up to monthly sums, there might be small inconsistencies with the total precipitation amount.\nOnly liquid precipitation of the preceding hour including local showers and rain from large scale systems.\nSnowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent\nTotal cloud cover as an area fraction\nLow level clouds and fog up to 2 km altitude\nMid level clouds from 2 to 6 km altitude\nHigh level clouds from 6 km altitude\nShortwave solar radiation as average of the preceding hour. This is equal to the total global horizontal irradiation\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDiffuse solar radiation as average of the preceding hour\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind direction at 10 or 100 meters above ground\nWind direction at 10 or 100 meters above ground\nGusts at 10 meters above ground of the indicated hour. Wind gusts in CERRA are defined as the maximum wind gusts of the preceding hour. Please consult the ECMWF IFS documentation for more information on how wind gusts are parameterized in weather models.\nET0 Reference Evapotranspiration of a well watered grass field. Based on FAO-56 Penman-Monteith equations ET0 is calculated from temperature, wind speed, humidity and solar radiation. Unlimited soil water is assumed. ET0 is commonly used to estimate the required irrigation for plants.\nWeather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details. Weather code is calculated from cloud cover analysis, precipitation and snowfall. As barely no information about atmospheric stability is available, estimation about thunderstorms is not possible.\nVapor Pressure Deificit (VPD) in kilopascal (kPa). For high VPD (>1.6), water transpiration of plants increases. For low VPD (<0.4), transpiration decreases\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths."
dailyDescription <- "The most severe weather condition on a given day\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily apparent temperature\nMaximum and minimum daily apparent temperature\nSum of daily precipitation (including rain, showers and snowfall)\nSum of daily rain\nSum of daily snowfall\nThe number of hours with rain\nSun rise and set times\nSun rise and set times\nMaximum wind speed and gusts on a day\nMaximum wind speed and gusts on a day\nDominant wind direction\nThe sum of solar radiaion on a given day in Megajoules\nDaily sum of ET0 Reference Evapotranspiration of a well watered grass field"
# Create tibble for hourly metrics
tblMetricsHourly <- tibble::tibble(metric=hourlyMetrics %>% str_split_1(","),
description=hourlyDescription %>% str_split_1("\n")
)
tblMetricsHourly %>%
print(n=50)
## # A tibble: 33 × 2
## metric description
## <chr> <chr>
## 1 temperature_2m Air temperature at 2 meters above ground
## 2 relativehumidity_2m Relative humidity at 2 meters above ground
## 3 dewpoint_2m Dew point temperature at 2 meters above ground
## 4 apparent_temperature Apparent temperature is the perceived feels-li…
## 5 pressure_msl Atmospheric air pressure reduced to mean sea l…
## 6 surface_pressure Atmospheric air pressure reduced to mean sea l…
## 7 precipitation Total precipitation (rain, showers, snow) sum …
## 8 rain Only liquid precipitation of the preceding hou…
## 9 snowfall Snowfall amount of the preceding hour in centi…
## 10 cloudcover Total cloud cover as an area fraction
## 11 cloudcover_low Low level clouds and fog up to 2 km altitude
## 12 cloudcover_mid Mid level clouds from 2 to 6 km altitude
## 13 cloudcover_high High level clouds from 6 km altitude
## 14 shortwave_radiation Shortwave solar radiation as average of the pr…
## 15 direct_radiation Direct solar radiation as average of the prece…
## 16 direct_normal_irradiance Direct solar radiation as average of the prece…
## 17 diffuse_radiation Diffuse solar radiation as average of the prec…
## 18 windspeed_10m Wind speed at 10 or 100 meters above ground. W…
## 19 windspeed_100m Wind speed at 10 or 100 meters above ground. W…
## 20 winddirection_10m Wind direction at 10 or 100 meters above ground
## 21 winddirection_100m Wind direction at 10 or 100 meters above ground
## 22 windgusts_10m Gusts at 10 meters above ground of the indicat…
## 23 et0_fao_evapotranspiration ET0 Reference Evapotranspiration of a well wat…
## 24 weathercode Weather condition as a numeric code. Follow WM…
## 25 vapor_pressure_deficit Vapor Pressure Deificit (VPD) in kilopascal (k…
## 26 soil_temperature_0_to_7cm Average temperature of different soil levels b…
## 27 soil_temperature_7_to_28cm Average temperature of different soil levels b…
## 28 soil_temperature_28_to_100cm Average temperature of different soil levels b…
## 29 soil_temperature_100_to_255cm Average temperature of different soil levels b…
## 30 soil_moisture_0_to_7cm Average soil water content as volumetric mixin…
## 31 soil_moisture_7_to_28cm Average soil water content as volumetric mixin…
## 32 soil_moisture_28_to_100cm Average soil water content as volumetric mixin…
## 33 soil_moisture_100_to_255cm Average soil water content as volumetric mixin…
# Create tibble for daily metrics
tblMetricsDaily <- tibble::tibble(metric=dailyMetrics %>% str_split_1(","),
description=dailyDescription %>% str_split_1("\n")
)
tblMetricsDaily
## # A tibble: 16 × 2
## metric description
## <chr> <chr>
## 1 weathercode The most severe weather condition on a given day
## 2 temperature_2m_max Maximum and minimum daily air temperature at 2 me…
## 3 temperature_2m_min Maximum and minimum daily air temperature at 2 me…
## 4 apparent_temperature_max Maximum and minimum daily apparent temperature
## 5 apparent_temperature_min Maximum and minimum daily apparent temperature
## 6 precipitation_sum Sum of daily precipitation (including rain, showe…
## 7 rain_sum Sum of daily rain
## 8 snowfall_sum Sum of daily snowfall
## 9 precipitation_hours The number of hours with rain
## 10 sunrise Sun rise and set times
## 11 sunset Sun rise and set times
## 12 windspeed_10m_max Maximum wind speed and gusts on a day
## 13 windgusts_10m_max Maximum wind speed and gusts on a day
## 14 winddirection_10m_dominant Dominant wind direction
## 15 shortwave_radiation_sum The sum of solar radiaion on a given day in Megaj…
## 16 et0_fao_evapotranspiration Daily sum of ET0 Reference Evapotranspiration of …
A function is written to process saved data for later use:
formatOpenMeteoJSON <- function(x,
glimpseData=TRUE,
addVars=FALSE,
addExtract="tblHourly",
showStats=addVars
) {
# FUNCTION ARGUMENTS:
# x: Saved json file for passage to readOpenMeteoJSON
# glimpseData: boolean, should a glimpse of the file and metadata be shown?
# addVars: boolean, should variables be added for later processing?
# addExtract: list elemented to be extracted (relevant only for addVars=TRUE)
# showStats: boolean, should counts of key elements be shown (relevant only for addVars=TRUE)
# Read file
lst <- readOpenMeteoJSON(x)
# Show a glimpse if requested
if(isTRUE(glimpseData)) {
print(lst)
prettyOpenMeteoMeta(lst)
}
# If no variables to be added, return the file
if(!isTRUE(addVars)) return(lst)
# Add statistics
df <- lst[[addExtract]] %>%
mutate(year=year(date),
month=factor(month.abb[lubridate::month(date)], levels=month.abb),
hour=lubridate::hour(time),
fct_hour=factor(hour),
tod=ifelse(hour>=7 & hour<=18, "Day", "Night"),
doy=yday(date),
season=case_when(month %in% c("Mar", "Apr", "May") ~ "Spring",
month %in% c("Jun", "Jul", "Aug") ~ "Summer",
month %in% c("Sep", "Oct", "Nov") ~ "Fall",
month %in% c("Dec", "Jan", "Feb") ~ "Winter",
TRUE~"typo"
),
todSeason=paste0(season, "-", tod),
tod=factor(tod, levels=c("Day", "Night")),
season=factor(season, levels=c("Spring", "Summer", "Fall", "Winter")),
todSeason=factor(todSeason,
levels=paste0(rep(c("Spring", "Summer", "Fall", "Winter"), each=2),
"-",
c("Day", "Night")
)
),
across(where(is.numeric), .fns=function(x) round(100*percent_rank(x)), .names="pct_{.col}")
)
# Show counts if requested
if(isTRUE(showStats)) {
# Glimpse file
glimpse(df)
# Counts of day-of-year/month
p1 <- df %>%
count(doy, month) %>%
ggplot(aes(y=doy, x=month)) +
geom_boxplot(aes(weight=n), fill="lightblue") +
labs(title="Observations by day-of-year and month", x=NULL, y="Day of Year")
print(p1)
# Counts of year/month
p2 <- df %>%
count(year, month) %>%
ggplot(aes(y=factor(year), x=month)) +
geom_tile(aes(fill=n)) +
geom_text(aes(label=n), size=3) +
scale_fill_continuous("# Records", low="white", high="green") +
labs(title="Records by year and month", x=NULL, y=NULL)
print(p2)
# Counts of todSeason-season-tod, hour-fct_hour-tod, and month-season
df %>% count(todSeason, season, tod) %>% print()
df %>% count(hour, fct_hour, tod) %>% print(n=30)
df %>% count(month, season) %>% print()
}
# Return the file
df
}
Core daily datasets are loaded:
# Read daily JSON file
nycOMDaily <- formatOpenMeteoJSON("testOM_daily_nyc.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 4,914 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 73 5 -1.4
## 2 2010-01-02 2010-01-02 71 -0.6 -9.2
## 3 2010-01-03 2010-01-03 71 -4.8 -10
## 4 2010-01-04 2010-01-04 1 -0.8 -7.3
## 5 2010-01-05 2010-01-05 1 -0.2 -7.3
## 6 2010-01-06 2010-01-06 2 1.1 -5.3
## 7 2010-01-07 2010-01-07 2 3.6 -3.7
## 8 2010-01-08 2010-01-08 71 1.9 -5.7
## 9 2010-01-09 2010-01-09 0 -1.4 -7.7
## 10 2010-01-10 2010-01-10 0 -1.7 -10.3
## # ℹ 4,904 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 40.7 -73.9 101. -14400 America/New_York
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 40.7
## longitude: -73.9
## generationtime_ms: 100.914
## utc_offset_seconds: -14400
## timezone: America/New_York
## timezone_abbreviation: EDT
## elevation: 36
laxOMDaily <- formatOpenMeteoJSON("testOM_daily_lax.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 5,113 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 2 20.1 4.7
## 2 2010-01-02 2010-01-02 1 23.2 6.7
## 3 2010-01-03 2010-01-03 1 23 6.5
## 4 2010-01-04 2010-01-04 2 22.1 6.5
## 5 2010-01-05 2010-01-05 1 22.9 5
## 6 2010-01-06 2010-01-06 2 23.2 7.7
## 7 2010-01-07 2010-01-07 1 23.3 5.2
## 8 2010-01-08 2010-01-08 1 22.8 8.4
## 9 2010-01-09 2010-01-09 2 21.5 7.2
## 10 2010-01-10 2010-01-10 1 24 7.5
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 34.1 -118. 58.9 -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 34.13005
## longitude: -118.4981
## generationtime_ms: 58.85398
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 333
chiOMDaily <- formatOpenMeteoJSON("testOM_daily_chi.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 5,113 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 3 -8.6 -13.4
## 2 2010-01-02 2010-01-02 2 -10.4 -15.1
## 3 2010-01-03 2010-01-03 3 -7.9 -13.8
## 4 2010-01-04 2010-01-04 3 -6.9 -12.3
## 5 2010-01-05 2010-01-05 3 -4.8 -9.8
## 6 2010-01-06 2010-01-06 71 -4.9 -9
## 7 2010-01-07 2010-01-07 73 -5.2 -8.5
## 8 2010-01-08 2010-01-08 73 -3 -9.4
## 9 2010-01-09 2010-01-09 3 -5.8 -12.3
## 10 2010-01-10 2010-01-10 3 -8.8 -19.4
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 41.9 -87.6 59.4 -18000 America/Chicago
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 41.86292
## longitude: -87.64877
## generationtime_ms: 59.38601
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
houOMDaily <- formatOpenMeteoJSON("testOM_daily_hou.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 5,113 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 3 11.8 3.9
## 2 2010-01-02 2010-01-02 1 12 0.7
## 3 2010-01-03 2010-01-03 3 10 4.4
## 4 2010-01-04 2010-01-04 3 7.6 1.8
## 5 2010-01-05 2010-01-05 0 8 -1.9
## 6 2010-01-06 2010-01-06 51 12.7 -0.1
## 7 2010-01-07 2010-01-07 55 13.4 -0.2
## 8 2010-01-08 2010-01-08 2 0.8 -3
## 9 2010-01-09 2010-01-09 0 4.4 -5.5
## 10 2010-01-10 2010-01-10 0 5.9 -4.6
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 29.8 -95.4 64.0 -18000 US/Central
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 29.77153
## longitude: -95.43555
## generationtime_ms: 63.96198
## utc_offset_seconds: -18000
## timezone: US/Central
## timezone_abbreviation: CDT
## elevation: 17
Processed hourly data for NYC and LA are loaded:
# Read hourly JSON file (NYC and LA)
nycTemp <- formatOpenMeteoJSON("testOM_hourly_nyc.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 117,936 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 -1.1 95
## 2 2010-01-01 01:00:00 2010-01-01 1 -1 96
## 3 2010-01-01 02:00:00 2010-01-01 2 -1 96
## 4 2010-01-01 03:00:00 2010-01-01 3 -0.8 97
## 5 2010-01-01 04:00:00 2010-01-01 4 -0.9 97
## 6 2010-01-01 05:00:00 2010-01-01 5 -0.8 97
## 7 2010-01-01 06:00:00 2010-01-01 6 -0.7 97
## 8 2010-01-01 07:00:00 2010-01-01 7 -0.5 97
## 9 2010-01-01 08:00:00 2010-01-01 8 -0.6 97
## 10 2010-01-01 09:00:00 2010-01-01 9 -0.6 97
## # ℹ 117,926 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 40.7 -73.9 118. -14400 America/New_York
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 40.7
## longitude: -73.9
## generationtime_ms: 118.0021
## utc_offset_seconds: -14400
## timezone: America/New_York
## timezone_abbreviation: EDT
## elevation: 36
##
## Rows: 117,936
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> -1.1, -1.0, -1.0, -0.8, -0.9, -0.8, …
## $ relativehumidity_2m <int> 95, 96, 96, 97, 97, 97, 97, 97, 97, …
## $ dewpoint_2m <dbl> -1.7, -1.6, -1.6, -1.2, -1.3, -1.2, …
## $ apparent_temperature <dbl> -3.9, -3.9, -3.9, -3.7, -3.7, -3.6, …
## $ pressure_msl <dbl> 1017.2, 1016.5, 1015.9, 1015.6, 1015…
## $ surface_pressure <dbl> 1012.6, 1011.9, 1011.3, 1011.0, 1011…
## $ precipitation <dbl> 0.5, 0.5, 0.4, 0.3, 0.1, 0.0, 0.0, 0…
## $ rain <dbl> 0.0, 0.1, 0.1, 0.1, 0.0, 0.0, 0.0, 0…
## $ snowfall <dbl> 0.35, 0.28, 0.21, 0.14, 0.07, 0.00, …
## $ cloudcover <int> 90, 93, 80, 68, 71, 100, 100, 100, 1…
## $ cloudcover_low <int> 2, 8, 3, 6, 15, 51, 99, 99, 96, 77, …
## $ cloudcover_mid <int> 98, 96, 99, 98, 95, 97, 98, 99, 94, …
## $ cloudcover_high <int> 97, 93, 59, 13, 0, 0, 0, 0, 0, 0, 0,…
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 53, 11…
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 20…
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 41, 93…
## $ windspeed_10m <dbl> 3.1, 3.5, 3.3, 3.9, 3.5, 3.4, 0.0, 1…
## $ windspeed_100m <dbl> 3.8, 3.1, 3.8, 4.7, 6.4, 5.7, 1.4, 1…
## $ winddirection_10m <int> 339, 336, 347, 338, 336, 342, 180, 2…
## $ winddirection_100m <int> 41, 21, 17, 356, 344, 342, 360, 217,…
## $ windgusts_10m <dbl> 9.0, 9.7, 10.1, 7.6, 7.6, 6.8, 5.4, …
## $ et0_fao_evapotranspiration <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, …
## $ weathercode <int> 73, 73, 73, 71, 71, 3, 3, 3, 3, 3, 3…
## $ vapor_pressure_deficit <dbl> 0.03, 0.02, 0.02, 0.02, 0.02, 0.02, …
## $ soil_temperature_0_to_7cm <dbl> -0.7, -0.7, -0.7, -0.6, -0.6, -0.6, …
## $ soil_temperature_7_to_28cm <dbl> 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0…
## $ soil_temperature_28_to_100cm <dbl> 4.2, 4.2, 4.1, 4.1, 4.1, 4.1, 4.1, 4…
## $ soil_temperature_100_to_255cm <dbl> 10.6, 10.6, 10.6, 10.6, 10.6, 10.6, …
## $ soil_moisture_0_to_7cm <dbl> 0.373, 0.374, 0.376, 0.377, 0.377, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.377, 0.377, 0.377, 0.377, 0.377, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.413, 0.413, 0.413, 0.413, 0.413, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.412, 0.412, 0.412, 0.412, 0.412, 0…
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 10, 10, 10, 11, 11, 11, 11, 12, 11, …
## $ pct_relativehumidity_2m <dbl> 92, 94, 94, 96, 96, 96, 96, 96, 96, …
## $ pct_dewpoint_2m <dbl> 23, 24, 24, 25, 25, 25, 25, 25, 25, …
## $ pct_apparent_temperature <dbl> 15, 15, 15, 15, 15, 15, 17, 17, 16, …
## $ pct_pressure_msl <dbl> 53, 49, 46, 44, 44, 41, 38, 36, 37, …
## $ pct_surface_pressure <dbl> 51, 47, 44, 42, 42, 39, 36, 35, 36, …
## $ pct_precipitation <dbl> 93, 93, 92, 90, 86, 0, 0, 0, 0, 0, 0…
## $ pct_rain <dbl> 0, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 99, 99, 99, 99, 98, 0, 0, 0, 0, 0, 0…
## $ pct_cloudcover <dbl> 77, 79, 72, 67, 68, 81, 81, 81, 81, …
## $ pct_cloudcover_low <dbl> 51, 60, 53, 58, 65, 77, 90, 90, 88, …
## $ pct_cloudcover_mid <dbl> 90, 89, 92, 90, 88, 89, 90, 92, 87, …
## $ pct_cloudcover_high <dbl> 81, 76, 63, 49, 0, 0, 0, 0, 0, 0, 0,…
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 57, 6…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 62…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 61…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 58, 7…
## $ pct_windspeed_10m <dbl> 3, 4, 3, 5, 4, 4, 0, 1, 2, 5, 8, 8, …
## $ pct_windspeed_100m <dbl> 2, 1, 2, 3, 6, 5, 0, 0, 4, 9, 9, 8, …
## $ pct_winddirection_10m <dbl> 94, 93, 96, 94, 93, 95, 35, 43, 53, …
## $ pct_winddirection_100m <dbl> 8, 4, 3, 99, 96, 95, 100, 46, 51, 61…
## $ pct_windgusts_10m <dbl> 3, 4, 5, 1, 1, 1, 0, 0, 0, 1, 2, 4, …
## $ pct_et0_fao_evapotranspiration <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 32, 4…
## $ pct_weathercode <dbl> 99, 99, 99, 98, 98, 69, 69, 69, 69, …
## $ pct_vapor_pressure_deficit <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 8, …
## $ pct_soil_temperature_0_to_7cm <dbl> 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10,…
## $ pct_soil_temperature_7_to_28cm <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, …
## $ pct_soil_temperature_28_to_100cm <dbl> 16, 16, 15, 15, 15, 15, 15, 15, 15, …
## $ pct_soil_temperature_100_to_255cm <dbl> 42, 42, 42, 42, 42, 42, 42, 42, 42, …
## $ pct_soil_moisture_0_to_7cm <dbl> 70, 71, 73, 74, 74, 74, 74, 74, 73, …
## $ pct_soil_moisture_7_to_28cm <dbl> 69, 69, 69, 69, 69, 68, 68, 68, 68, …
## $ pct_soil_moisture_28_to_100cm <dbl> 96, 96, 96, 96, 96, 96, 96, 96, 96, …
## $ pct_soil_moisture_100_to_255cm <dbl> 96, 96, 96, 96, 96, 96, 96, 96, 96, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 14532
## 4 Summer-Night Summer Night 14532
## 5 Fall-Day Fall Day 14196
## 6 Fall-Night Fall Night 14196
## 7 Winter-Day Winter Day 14784
## 8 Winter-Night Winter Night 14784
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 4914
## 2 1 1 Night 4914
## 3 2 2 Night 4914
## 4 3 3 Night 4914
## 5 4 4 Night 4914
## 6 5 5 Night 4914
## 7 6 6 Night 4914
## 8 7 7 Day 4914
## 9 8 8 Day 4914
## 10 9 9 Day 4914
## 11 10 10 Day 4914
## 12 11 11 Day 4914
## 13 12 12 Day 4914
## 14 13 13 Day 4914
## 15 14 14 Day 4914
## 16 15 15 Day 4914
## 17 16 16 Day 4914
## 18 17 17 Day 4914
## 19 18 18 Day 4914
## 20 19 19 Night 4914
## 21 20 20 Night 4914
## 22 21 21 Night 4914
## 23 22 22 Night 4914
## 24 23 23 Night 4914
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 9720
## 7 Jul Summer 9672
## 8 Aug Summer 9672
## 9 Sep Fall 9360
## 10 Oct Fall 9672
## 11 Nov Fall 9360
## 12 Dec Winter 9672
laxTemp <- formatOpenMeteoJSON("testOM_hourly_lax.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 122,712 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 6.3 60
## 2 2010-01-01 01:00:00 2010-01-01 1 5.7 62
## 3 2010-01-01 02:00:00 2010-01-01 2 5.3 63
## 4 2010-01-01 03:00:00 2010-01-01 3 5 64
## 5 2010-01-01 04:00:00 2010-01-01 4 4.8 64
## 6 2010-01-01 05:00:00 2010-01-01 5 4.7 64
## 7 2010-01-01 06:00:00 2010-01-01 6 4.7 64
## 8 2010-01-01 07:00:00 2010-01-01 7 4.8 64
## 9 2010-01-01 08:00:00 2010-01-01 8 5.2 64
## 10 2010-01-01 09:00:00 2010-01-01 9 6.3 63
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 34.1 -118. 6196. -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 34.13005
## longitude: -118.4981
## generationtime_ms: 6196.377
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 333
##
## Rows: 122,712
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> 6.3, 5.7, 5.3, 5.0, 4.8, 4.7, 4.7, 4…
## $ relativehumidity_2m <int> 60, 62, 63, 64, 64, 64, 64, 64, 64, …
## $ dewpoint_2m <dbl> -0.9, -1.0, -1.2, -1.3, -1.4, -1.4, …
## $ apparent_temperature <dbl> 2.9, 2.3, 1.8, 1.3, 1.0, 0.9, 0.9, 1…
## $ pressure_msl <dbl> 1026.5, 1026.1, 1025.7, 1025.7, 1024…
## $ surface_pressure <dbl> 985.7, 985.2, 984.8, 984.7, 983.9, 9…
## $ precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover <int> 14, 21, 23, 29, 31, 30, 29, 30, 31, …
## $ cloudcover_low <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_mid <int> 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 2, 6, …
## $ cloudcover_high <int> 48, 71, 78, 95, 100, 99, 98, 99, 100…
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 142, …
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 27, 16…
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 115, …
## $ windspeed_10m <dbl> 7.4, 7.8, 8.0, 9.7, 9.7, 10.1, 10.0,…
## $ windspeed_100m <dbl> 10.4, 10.6, 11.0, 14.9, 14.8, 14.6, …
## $ winddirection_10m <int> 14, 13, 10, 15, 15, 17, 15, 13, 13, …
## $ winddirection_100m <int> 20, 24, 19, 20, 18, 20, 18, 18, 16, …
## $ windgusts_10m <dbl> 19.1, 19.1, 19.4, 19.8, 20.9, 21.6, …
## $ et0_fao_evapotranspiration <dbl> 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, …
## $ weathercode <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ vapor_pressure_deficit <dbl> 0.38, 0.35, 0.33, 0.31, 0.31, 0.31, …
## $ soil_temperature_0_to_7cm <dbl> 7.0, 6.6, 6.2, 5.8, 5.6, 5.4, 5.3, 5…
## $ soil_temperature_7_to_28cm <dbl> 10.8, 10.6, 10.3, 10.1, 9.9, 9.7, 9.…
## $ soil_temperature_28_to_100cm <dbl> 12.9, 12.9, 12.9, 12.9, 12.9, 12.9, …
## $ soil_temperature_100_to_255cm <dbl> 20.5, 20.5, 20.5, 20.5, 20.5, 20.5, …
## $ soil_moisture_0_to_7cm <dbl> 0.205, 0.205, 0.205, 0.205, 0.205, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.251, 0.251, 0.251, 0.250, 0.250, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.168, 0.168, 0.168, 0.168, 0.168, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.165, 0.165, 0.165, 0.165, 0.165, 0…
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 4, 3, 3, 2, 2, 2, 2, 2, 3, 4, 12, 34…
## $ pct_relativehumidity_2m <dbl> 52, 54, 55, 57, 57, 57, 57, 57, 57, …
## $ pct_dewpoint_2m <dbl> 15, 15, 15, 14, 14, 14, 14, 14, 15, …
## $ pct_apparent_temperature <dbl> 4, 3, 3, 2, 2, 2, 2, 2, 2, 4, 10, 28…
## $ pct_pressure_msl <dbl> 100, 100, 99, 99, 99, 99, 98, 98, 98…
## $ pct_surface_pressure <dbl> 99, 99, 99, 99, 98, 98, 97, 97, 97, …
## $ pct_precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover <dbl> 58, 63, 65, 71, 75, 73, 71, 73, 75, …
## $ pct_cloudcover_low <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_mid <dbl> 0, 0, 0, 0, 76, 0, 0, 0, 78, 80, 78,…
## $ pct_cloudcover_high <dbl> 80, 84, 85, 91, 96, 95, 94, 95, 96, …
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 59, 6…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 55, 6…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 54, 6…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 86, 9…
## $ pct_windspeed_10m <dbl> 61, 64, 65, 77, 77, 79, 79, 79, 79, …
## $ pct_windspeed_100m <dbl> 60, 61, 63, 81, 80, 80, 79, 79, 78, …
## $ pct_winddirection_10m <dbl> 6, 5, 3, 7, 7, 8, 7, 5, 5, 5, 7, 9, …
## $ pct_winddirection_100m <dbl> 8, 10, 8, 8, 7, 8, 7, 7, 6, 4, 4, 4,…
## $ pct_windgusts_10m <dbl> 51, 51, 52, 53, 56, 58, 58, 59, 58, …
## $ pct_et0_fao_evapotranspiration <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, …
## $ pct_weathercode <dbl> 0, 63, 63, 63, 63, 63, 63, 63, 63, 6…
## $ pct_vapor_pressure_deficit <dbl> 31, 29, 28, 27, 27, 27, 26, 27, 27, …
## $ pct_soil_temperature_0_to_7cm <dbl> 3, 3, 2, 2, 2, 1, 1, 1, 1, 2, 5, 15,…
## $ pct_soil_temperature_7_to_28cm <dbl> 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 2, …
## $ pct_soil_temperature_28_to_100cm <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
## $ pct_soil_temperature_100_to_255cm <dbl> 64, 64, 64, 64, 64, 64, 64, 64, 64, …
## $ pct_soil_moisture_0_to_7cm <dbl> 83, 83, 83, 83, 83, 83, 83, 83, 83, …
## $ pct_soil_moisture_7_to_28cm <dbl> 87, 87, 87, 87, 87, 87, 87, 87, 87, …
## $ pct_soil_moisture_28_to_100cm <dbl> 56, 56, 56, 56, 56, 56, 56, 56, 56, …
## $ pct_soil_moisture_100_to_255cm <dbl> 34, 34, 34, 34, 34, 34, 34, 34, 34, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day Fall Day 15288
## 6 Fall-Night Fall Night 15288
## 7 Winter-Day Winter Day 15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 5113
## 2 1 1 Night 5113
## 3 2 2 Night 5113
## 4 3 3 Night 5113
## 5 4 4 Night 5113
## 6 5 5 Night 5113
## 7 6 6 Night 5113
## 8 7 7 Day 5113
## 9 8 8 Day 5113
## 10 9 9 Day 5113
## 11 10 10 Day 5113
## 12 11 11 Day 5113
## 13 12 12 Day 5113
## 14 13 13 Day 5113
## 15 14 14 Day 5113
## 16 15 15 Day 5113
## 17 16 16 Day 5113
## 18 17 17 Day 5113
## 19 18 18 Day 5113
## 20 19 19 Night 5113
## 21 20 20 Night 5113
## 22 21 21 Night 5113
## 23 22 22 Night 5113
## 24 23 23 Night 5113
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 10080
## 7 Jul Summer 10416
## 8 Aug Summer 10416
## 9 Sep Fall 10080
## 10 Oct Fall 10416
## 11 Nov Fall 10080
## 12 Dec Winter 10416
Processed hourly data for Chicago and Houston are loaded:
# Read hourly JSON file (CHI and HOU)
chiTemp <- formatOpenMeteoJSON("testOM_hourly_chi.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 122,712 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 -9.5 67
## 2 2010-01-01 01:00:00 2010-01-01 1 -9.8 69
## 3 2010-01-01 02:00:00 2010-01-01 2 -10.3 73
## 4 2010-01-01 03:00:00 2010-01-01 3 -10.8 74
## 5 2010-01-01 04:00:00 2010-01-01 4 -11.3 75
## 6 2010-01-01 05:00:00 2010-01-01 5 -11.8 76
## 7 2010-01-01 06:00:00 2010-01-01 6 -12.3 77
## 8 2010-01-01 07:00:00 2010-01-01 7 -12.8 78
## 9 2010-01-01 08:00:00 2010-01-01 8 -13.2 79
## 10 2010-01-01 09:00:00 2010-01-01 9 -13.4 78
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 41.9 -87.6 4476. -18000 America/Chicago
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 41.86292
## longitude: -87.64877
## generationtime_ms: 4476.2
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
##
## Rows: 122,712
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> -9.5, -9.8, -10.3, -10.8, -11.3, -11…
## $ relativehumidity_2m <int> 67, 69, 73, 74, 75, 76, 77, 78, 79, …
## $ dewpoint_2m <dbl> -14.4, -14.4, -14.2, -14.5, -14.8, -…
## $ apparent_temperature <dbl> -15.8, -16.3, -16.8, -17.2, -17.7, -…
## $ pressure_msl <dbl> 1024.4, 1024.7, 1025.3, 1025.8, 1026…
## $ surface_pressure <dbl> 1000.8, 1001.1, 1001.6, 1002.1, 1002…
## $ precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover <int> 62, 47, 20, 15, 15, 19, 25, 22, 22, …
## $ cloudcover_low <int> 69, 52, 22, 17, 17, 21, 28, 25, 25, …
## $ cloudcover_mid <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, …
## $ cloudcover_high <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 119, …
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 69, 14…
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 50, 7…
## $ windspeed_10m <dbl> 18.7, 20.1, 19.9, 19.5, 19.0, 19.4, …
## $ windspeed_100m <dbl> 25.9, 28.4, 29.2, 29.8, 30.1, 30.0, …
## $ winddirection_10m <int> 298, 291, 290, 289, 289, 288, 287, 2…
## $ winddirection_100m <int> 299, 294, 294, 295, 295, 294, 295, 2…
## $ windgusts_10m <dbl> 33.8, 32.4, 34.2, 33.1, 31.3, 31.7, …
## $ et0_fao_evapotranspiration <dbl> 0.02, 0.01, 0.01, 0.01, 0.01, 0.01, …
## $ weathercode <int> 2, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, …
## $ vapor_pressure_deficit <dbl> 0.10, 0.09, 0.08, 0.07, 0.06, 0.06, …
## $ soil_temperature_0_to_7cm <dbl> -1.5, -1.6, -1.8, -1.9, -2.1, -2.3, …
## $ soil_temperature_7_to_28cm <dbl> -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, …
## $ soil_temperature_28_to_100cm <dbl> 2.4, 2.4, 2.4, 2.4, 2.3, 2.3, 2.3, 2…
## $ soil_temperature_100_to_255cm <dbl> 9.0, 9.0, 9.0, 9.0, 8.9, 8.9, 8.9, 8…
## $ soil_moisture_0_to_7cm <dbl> 0.295, 0.295, 0.294, 0.294, 0.294, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.300, 0.300, 0.300, 0.300, 0.300, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.334, 0.334, 0.334, 0.334, 0.334, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.310, 0.310, 0.310, 0.310, 0.311, 0…
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_relativehumidity_2m <dbl> 33, 37, 46, 48, 50, 52, 55, 57, 59, …
## $ pct_dewpoint_2m <dbl> 4, 4, 5, 4, 4, 4, 4, 4, 3, 3, 3, 4, …
## $ pct_apparent_temperature <dbl> 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_pressure_msl <dbl> 84, 85, 86, 88, 89, 89, 90, 91, 91, …
## $ pct_surface_pressure <dbl> 80, 81, 83, 85, 85, 86, 87, 89, 89, …
## $ pct_precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover <dbl> 62, 55, 33, 30, 30, 33, 37, 35, 35, …
## $ pct_cloudcover_low <dbl> 77, 74, 66, 64, 64, 66, 68, 67, 67, …
## $ pct_cloudcover_mid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0,…
## $ pct_cloudcover_high <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 63, 7…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 69, 7…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 76, 8…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 59, 6…
## $ pct_windspeed_10m <dbl> 66, 72, 71, 70, 68, 69, 65, 63, 59, …
## $ pct_windspeed_100m <dbl> 59, 67, 69, 71, 72, 72, 67, 63, 61, …
## $ pct_winddirection_10m <dbl> 87, 85, 84, 84, 84, 84, 83, 83, 83, …
## $ pct_winddirection_100m <dbl> 86, 85, 85, 85, 85, 85, 85, 85, 84, …
## $ pct_windgusts_10m <dbl> 69, 65, 70, 67, 62, 63, 63, 61, 59, …
## $ pct_et0_fao_evapotranspiration <dbl> 27, 16, 16, 16, 16, 16, 16, 16, 16, …
## $ pct_weathercode <dbl> 55, 34, 0, 0, 0, 0, 34, 34, 34, 0, 3…
## $ pct_vapor_pressure_deficit <dbl> 17, 15, 12, 10, 7, 7, 5, 5, 5, 5, 5,…
## $ pct_soil_temperature_0_to_7cm <dbl> 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 2, 2, …
## $ pct_soil_temperature_7_to_28cm <dbl> 11, 11, 11, 11, 11, 11, 11, 11, 11, …
## $ pct_soil_temperature_28_to_100cm <dbl> 18, 18, 18, 18, 18, 18, 18, 18, 18, …
## $ pct_soil_temperature_100_to_255cm <dbl> 40, 40, 40, 40, 40, 40, 40, 40, 40, …
## $ pct_soil_moisture_0_to_7cm <dbl> 80, 80, 80, 80, 80, 80, 80, 80, 80, …
## $ pct_soil_moisture_7_to_28cm <dbl> 84, 84, 84, 84, 84, 84, 84, 84, 84, …
## $ pct_soil_moisture_28_to_100cm <dbl> 99, 99, 99, 99, 99, 99, 99, 98, 98, …
## $ pct_soil_moisture_100_to_255cm <dbl> 85, 85, 85, 85, 86, 86, 86, 86, 86, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day Fall Day 15288
## 6 Fall-Night Fall Night 15288
## 7 Winter-Day Winter Day 15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 5113
## 2 1 1 Night 5113
## 3 2 2 Night 5113
## 4 3 3 Night 5113
## 5 4 4 Night 5113
## 6 5 5 Night 5113
## 7 6 6 Night 5113
## 8 7 7 Day 5113
## 9 8 8 Day 5113
## 10 9 9 Day 5113
## 11 10 10 Day 5113
## 12 11 11 Day 5113
## 13 12 12 Day 5113
## 14 13 13 Day 5113
## 15 14 14 Day 5113
## 16 15 15 Day 5113
## 17 16 16 Day 5113
## 18 17 17 Day 5113
## 19 18 18 Day 5113
## 20 19 19 Night 5113
## 21 20 20 Night 5113
## 22 21 21 Night 5113
## 23 22 22 Night 5113
## 24 23 23 Night 5113
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 10080
## 7 Jul Summer 10416
## 8 Aug Summer 10416
## 9 Sep Fall 10080
## 10 Oct Fall 10416
## 11 Nov Fall 10080
## 12 Dec Winter 10416
houTemp <- formatOpenMeteoJSON("testOM_hourly_hou.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 122,712 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 10.9 93
## 2 2010-01-01 01:00:00 2010-01-01 1 9.9 92
## 3 2010-01-01 02:00:00 2010-01-01 2 8.6 88
## 4 2010-01-01 03:00:00 2010-01-01 3 7.7 86
## 5 2010-01-01 04:00:00 2010-01-01 4 7.2 85
## 6 2010-01-01 05:00:00 2010-01-01 5 6.8 84
## 7 2010-01-01 06:00:00 2010-01-01 6 6.4 82
## 8 2010-01-01 07:00:00 2010-01-01 7 5.9 83
## 9 2010-01-01 08:00:00 2010-01-01 8 5.6 83
## 10 2010-01-01 09:00:00 2010-01-01 9 5.5 82
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 29.8 -95.4 3762. -18000 US/Central
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 29.77153
## longitude: -95.43555
## generationtime_ms: 3762.283
## utc_offset_seconds: -18000
## timezone: US/Central
## timezone_abbreviation: CDT
## elevation: 17
##
## Rows: 122,712
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> 10.9, 9.9, 8.6, 7.7, 7.2, 6.8, 6.4, …
## $ relativehumidity_2m <int> 93, 92, 88, 86, 85, 84, 82, 83, 83, …
## $ dewpoint_2m <dbl> 9.8, 8.6, 6.7, 5.6, 4.8, 4.2, 3.6, 3…
## $ apparent_temperature <dbl> 7.4, 5.7, 4.1, 3.2, 2.9, 2.4, 2.2, 1…
## $ pressure_msl <dbl> 1025.2, 1025.9, 1026.8, 1027.1, 1027…
## $ surface_pressure <dbl> 1023.1, 1023.8, 1024.7, 1025.0, 1025…
## $ precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover <int> 90, 90, 88, 88, 89, 89, 86, 80, 90, …
## $ cloudcover_low <int> 100, 100, 98, 98, 99, 99, 96, 89, 10…
## $ cloudcover_mid <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_high <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 89, 1…
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 28, 58…
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 61, 1…
## $ windspeed_10m <dbl> 24.0, 25.9, 25.3, 23.5, 20.9, 20.7, …
## $ windspeed_100m <dbl> 37.4, 39.1, 38.4, 35.4, 32.0, 31.2, …
## $ winddirection_10m <int> 330, 333, 336, 339, 341, 340, 347, 3…
## $ winddirection_100m <int> 332, 334, 337, 341, 343, 341, 347, 3…
## $ windgusts_10m <dbl> 44.3, 46.1, 46.8, 44.3, 41.0, 37.8, …
## $ et0_fao_evapotranspiration <dbl> 0.00, 0.01, 0.01, 0.01, 0.02, 0.02, …
## $ weathercode <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, …
## $ vapor_pressure_deficit <dbl> 0.10, 0.10, 0.14, 0.14, 0.16, 0.16, …
## $ soil_temperature_0_to_7cm <dbl> 11.9, 11.5, 11.0, 10.5, 10.1, 9.8, 9…
## $ soil_temperature_7_to_28cm <dbl> 12.3, 12.3, 12.2, 12.2, 12.1, 12.0, …
## $ soil_temperature_28_to_100cm <dbl> 14.2, 14.2, 14.2, 14.2, 14.2, 14.2, …
## $ soil_temperature_100_to_255cm <dbl> 20.9, 20.9, 20.9, 20.9, 20.9, 20.9, …
## $ soil_moisture_0_to_7cm <dbl> 0.462, 0.462, 0.462, 0.462, 0.462, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.474, 0.474, 0.474, 0.474, 0.473, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.498, 0.498, 0.498, 0.498, 0.498, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.453, 0.453, 0.453, 0.453, 0.453, 0…
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 12, 10, 8, 6, 6, 5, 5, 4, 4, 4, 4, 5…
## $ pct_relativehumidity_2m <dbl> 80, 77, 67, 63, 61, 59, 55, 57, 57, …
## $ pct_dewpoint_2m <dbl> 23, 21, 17, 15, 13, 12, 11, 10, 9, 9…
## $ pct_apparent_temperature <dbl> 11, 9, 6, 5, 5, 4, 4, 4, 4, 3, 3, 4,…
## $ pct_pressure_msl <dbl> 92, 93, 94, 95, 96, 97, 97, 97, 97, …
## $ pct_surface_pressure <dbl> 92, 93, 94, 95, 96, 97, 97, 97, 98, …
## $ pct_precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover <dbl> 80, 80, 79, 79, 79, 79, 78, 76, 80, …
## $ pct_cloudcover_low <dbl> 89, 89, 87, 87, 88, 88, 86, 84, 89, …
## $ pct_cloudcover_mid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_high <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 59, 6…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 63, 6…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 60, 7…
## $ pct_windspeed_10m <dbl> 95, 97, 96, 94, 90, 89, 83, 79, 78, …
## $ pct_windspeed_100m <dbl> 96, 97, 97, 95, 90, 89, 82, 78, 76, …
## $ pct_winddirection_10m <dbl> 91, 92, 92, 93, 93, 93, 95, 98, 96, …
## $ pct_winddirection_100m <dbl> 92, 92, 93, 94, 94, 94, 96, 99, 97, …
## $ pct_windgusts_10m <dbl> 94, 96, 96, 94, 91, 87, 87, 84, 77, …
## $ pct_et0_fao_evapotranspiration <dbl> 0, 24, 24, 24, 32, 32, 32, 24, 24, 3…
## $ pct_weathercode <dbl> 69, 69, 69, 69, 69, 69, 69, 69, 69, …
## $ pct_vapor_pressure_deficit <dbl> 10, 10, 16, 16, 19, 19, 20, 19, 19, …
## $ pct_soil_temperature_0_to_7cm <dbl> 10, 9, 8, 7, 6, 6, 5, 4, 4, 4, 4, 5,…
## $ pct_soil_temperature_7_to_28cm <dbl> 6, 6, 6, 6, 6, 6, 5, 5, 5, 4, 4, 4, …
## $ pct_soil_temperature_28_to_100cm <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ pct_soil_temperature_100_to_255cm <dbl> 38, 38, 38, 38, 38, 38, 38, 38, 38, …
## $ pct_soil_moisture_0_to_7cm <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, …
## $ pct_soil_moisture_7_to_28cm <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ pct_soil_moisture_28_to_100cm <dbl> 98, 98, 98, 98, 98, 98, 98, 98, 98, …
## $ pct_soil_moisture_100_to_255cm <dbl> 82, 82, 82, 82, 82, 82, 82, 82, 82, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day Fall Day 15288
## 6 Fall-Night Fall Night 15288
## 7 Winter-Day Winter Day 15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 5113
## 2 1 1 Night 5113
## 3 2 2 Night 5113
## 4 3 3 Night 5113
## 5 4 4 Night 5113
## 6 5 5 Night 5113
## 7 6 6 Night 5113
## 8 7 7 Day 5113
## 9 8 8 Day 5113
## 10 9 9 Day 5113
## 11 10 10 Day 5113
## 12 11 11 Day 5113
## 13 12 12 Day 5113
## 14 13 13 Day 5113
## 15 14 14 Day 5113
## 16 15 15 Day 5113
## 17 16 16 Day 5113
## 18 17 17 Day 5113
## 19 18 18 Day 5113
## 20 19 19 Night 5113
## 21 20 20 Night 5113
## 22 21 21 Night 5113
## 23 22 22 Night 5113
## 24 23 23 Night 5113
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 10080
## 7 Jul Summer 10416
## 8 Aug Summer 10416
## 9 Sep Fall 10080
## 10 Oct Fall 10416
## 11 Nov Fall 10080
## 12 Dec Winter 10416
An integrated set of all-city test and train data is created:
# Bind all the data frames
allCity <- list("NYC"=nycTemp,
"LA"=laxTemp,
"Chicago"=chiTemp,
"Houston"=houTemp
) %>%
bind_rows(.id="src")
# Create the index for training data
set.seed(24061512)
idxTrain <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)
# Add test-train flag to full dataset
allCity <- allCity %>%
mutate(tt=ifelse(row_number() %in% idxTrain, "train", "test"),
fct_src=factor(src))
allCity
## # A tibble: 486,072 × 83
## src time date hour temperature_2m relativehumidity_2m
## <chr> <dttm> <date> <int> <dbl> <int>
## 1 NYC 2010-01-01 00:00:00 2010-01-01 0 -1.1 95
## 2 NYC 2010-01-01 01:00:00 2010-01-01 1 -1 96
## 3 NYC 2010-01-01 02:00:00 2010-01-01 2 -1 96
## 4 NYC 2010-01-01 03:00:00 2010-01-01 3 -0.8 97
## 5 NYC 2010-01-01 04:00:00 2010-01-01 4 -0.9 97
## 6 NYC 2010-01-01 05:00:00 2010-01-01 5 -0.8 97
## 7 NYC 2010-01-01 06:00:00 2010-01-01 6 -0.7 97
## 8 NYC 2010-01-01 07:00:00 2010-01-01 7 -0.5 97
## 9 NYC 2010-01-01 08:00:00 2010-01-01 8 -0.6 97
## 10 NYC 2010-01-01 09:00:00 2010-01-01 9 -0.6 97
## # ℹ 486,062 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>%
count(year, src, tt) %>%
pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 8 × 16
## src tt `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
## <chr> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Chicago test 2555 2660 2671 2667 2612 2648 2550 2567 2648
## 2 Chicago train 6205 6100 6113 6093 6148 6112 6234 6193 6112
## 3 Houston test 2666 2562 2671 2621 2695 2639 2595 2688 2631
## 4 Houston train 6094 6198 6113 6139 6065 6121 6189 6072 6129
## 5 LA test 2638 2653 2679 2591 2645 2634 2648 2579 2729
## 6 LA train 6122 6107 6105 6169 6115 6126 6136 6181 6031
## 7 NYC test 2644 2648 2579 2627 2645 2577 2603 2589 2618
## 8 NYC train 6116 6112 6205 6133 6115 6183 6181 6171 6142
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## # `2023` <int>
Distributions of several key variables are explored:
keyVars <- c('temperature_2m',
'relativehumidity_2m',
'dewpoint_2m',
'shortwave_radiation',
'vapor_pressure_deficit',
'soil_temperature_28_to_100cm',
'soil_temperature_100_to_255cm',
'soil_moisture_28_to_100cm',
'soil_moisture_100_to_255cm'
)
allCity %>%
colSelector(vecSelect=c("src", keyVars)) %>%
pivot_longer(cols=-c(src)) %>%
ggplot(aes(x=src, y=value)) +
geom_boxplot(aes(fill=src)) +
facet_wrap(~name, scales="free_y") +
labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") +
scale_fill_discrete(NULL)
In addition, pair plots by city are create for several combinations of variables:
keyVars <- c('pressure_msl',
'surface_pressure',
'soil_temperature_100_to_255cm',
'soil_moisture_100_to_255cm'
)
for(intCtr in 1:(length(keyVars)-1)) {
for(intCtr2 in (intCtr+1):length(keyVars)) {
p1 <- allCity %>%
mutate(across(c("pressure_msl", "surface_pressure", "soil_temperature_100_to_255cm"),
.fns=function(x) round(x*2)/2
),
soil_moisture_100_to_255cm=round(soil_moisture_100_to_255cm, 2)
) %>%
colSelector(vecSelect=c("src", keyVars[c(intCtr, intCtr2)])) %>%
group_by(across(c("src", keyVars[c(intCtr, intCtr2)]))) %>%
summarize(n=n(), .groups="drop") %>%
ungroup() %>%
ggplot(aes(x=get(keyVars[intCtr]), y=get(keyVars[intCtr2]))) +
geom_point(aes(color=src, size=n), alpha=0.25) +
labs(title="Distribution of Key Metrics by City", x=keyVars[intCtr], y=keyVars[intCtr2]) +
scale_size_continuous("# Obs")
print(p1)
}
}
The cities are well differentiated by several combinations, particularly surface pressure vs. MSL pressure
A full random forest model is run for predicting city using LA, NYC, and Chicago:
# Create set of relevant training variables
varsTrain <- allCity %>%
select(starts_with("pct")) %>%
names() %>%
str_replace(pattern="pct_", replacement="")
varsTrain
## [1] "hour" "temperature_2m"
## [3] "relativehumidity_2m" "dewpoint_2m"
## [5] "apparent_temperature" "pressure_msl"
## [7] "surface_pressure" "precipitation"
## [9] "rain" "snowfall"
## [11] "cloudcover" "cloudcover_low"
## [13] "cloudcover_mid" "cloudcover_high"
## [15] "shortwave_radiation" "direct_radiation"
## [17] "direct_normal_irradiance" "diffuse_radiation"
## [19] "windspeed_10m" "windspeed_100m"
## [21] "winddirection_10m" "winddirection_100m"
## [23] "windgusts_10m" "et0_fao_evapotranspiration"
## [25] "weathercode" "vapor_pressure_deficit"
## [27] "soil_temperature_0_to_7cm" "soil_temperature_7_to_28cm"
## [29] "soil_temperature_28_to_100cm" "soil_temperature_100_to_255cm"
## [31] "soil_moisture_0_to_7cm" "soil_moisture_7_to_28cm"
## [33] "soil_moisture_28_to_100cm" "soil_moisture_100_to_255cm"
## [35] "year" "doy"
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
keyCities <- c("NYC", "LA", "Chicago")
rfCity <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities),
yVar="fct_src",
xVars=varsTrain,
dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
returnData=TRUE
)
## Warning: Dropped unused factor level(s) in dependent variable: Houston.
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%
Prediction accuracy is 100%, as expected given the significant differentiation. Houston is assessed for the city it is “most similar” to:
predictRF(rfCity$rf, df=allCity %>% filter(tt=="test", year==2022)) %>%
plotConfusion(trueCol="fct_src", useSub=NULL, plotCont=FALSE)
Based on predictors in the three-city random forest, Houston is most similar to NYC. The full random forest model is updated, including Houston:
keyCities <- c("NYC", "LA", "Chicago", "Houston")
rfCity <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities),
yVar="fct_src",
xVars=varsTrain,
dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
returnData=TRUE
)
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 100%
Even with the similarities between NYC and Houston, there is sufficient differentiation in the predictors to drive 100% accuracy
A model is created to predict temperature for two cities:
keyCities <- c("NYC", "Chicago")
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfTemp2m <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022, src %in% keyCities),
yVar="temperature_2m",
xVars=c(varsTrain[!str_detect(varsTrain, "^temp|ature$")]),
dfTest=allCity %>% filter(tt=="test", year==2022, src %in% keyCities),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 65%. Estimated remaining time: 16 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.41% (RMSE 0.86 vs. 11.16 null)
## `geom_smooth()` using formula = 'y ~ x'
Temperature predictions on holdout data for NYC and Chicago have R-squared over 99%. The model is applied to data from Houston and LA:
# Temperature predictions for LA
predTempLA <- predictRF(rfTemp2m$rf, df=allCity %>% filter(tt=="test", year==2022, src=="LA"))
reportAccuracy(predTempLA, trueCol="temperature_2m", reportR2=TRUE, useLabel="LA temperature predictions")
##
## R-squared of LA temperature predictions is: 92.38% (RMSE 1.89 vs. 6.86 null)
plotConfusion(predTempLA, trueCol="temperature_2m", plotCont=TRUE, rndTo=0.5, refXY=TRUE, useSub="LA")
## `geom_smooth()` using formula = 'y ~ x'
# Temperature predictions for Houston
predTempHOU <- predictRF(rfTemp2m$rf, df=allCity %>% filter(tt=="test", year==2022, src=="Houston"))
reportAccuracy(predTempHOU, trueCol="temperature_2m", reportR2=TRUE, useLabel="Houston temperature predictions")
##
## R-squared of Houston temperature predictions is: 97.22% (RMSE 1.44 vs. 8.63 null)
plotConfusion(predTempHOU, trueCol="temperature_2m", plotCont=TRUE, rndTo=0.5, refXY=TRUE, useSub="Houston")
## `geom_smooth()` using formula = 'y ~ x'
Predictions for two cities not included in the original model have ~95% R-squared. Houston being relatively similar to NYC has higher R-squared than LA
Function runFullRF() is updated to allow for using an existing model with new data:
runFullRF <- function(dfTrain,
yVar,
xVars,
useExistingRF=NULL,
dfTest=dfTrain,
useLabel="test data",
useSub=NULL,
isContVar=FALSE,
rndTo=NULL,
rndBucketsAuto=100,
nSig=NULL,
refXY=FALSE,
makePlots=TRUE,
plotImp=makePlots,
plotConf=makePlots,
returnData=FALSE,
...
) {
# FUNCTION ARGUMENTS:
# dfTrain: training data
# yVar: dependent variable
# xVars: column(s) containing independent variables
# useExistingRF: an existing RF model, meaning only steps 3-5 are run (default NULL means run all steps)
# dfTest: test dataset for applying predictions
# useLabel: label to be used for reporting accuracy
# useSub: subtitle to be used for confusion chart (NULL means none)
# isContVar: boolean, is the variable continuous? (default FALSE means categorical)
# rndTo: every number in x should be rounded to the nearest rndTo
# NULL means no rounding (default)
# -1L means make an estimate based on data
# rndBucketsAuto: integer, if rndTo is -1L, about how many buckets are desired for predictions?
# nSig: number of significant digits for automatically calculated rounding parameter
# (NULL means calculate exactly)
# refXY: boolean, should a reference line for y=x be included? (relevant only for continuous)
# makePlots: boolean, should plots be created for variable importance and confusion matrix?
# plotImp: boolean, should variable importance be plotted? (default is makePlots)
# plotConf: boolean, should confusion matrix be plotted? (default is makePlots)
# returnData: boolean, should data be returned?
# ...: additional parameters to pass to runSimpleRF(), which are then passed to ranger::ranger()
# Create the RF and plot importances, unless an RF is passed
if(is.null(useExistingRF)) {
# 1. Run random forest using impurity for importance
rf <- runSimpleRF(df=dfTrain, yVar=yVar, xVars=xVars, importance="impurity", ...)
# 2. Create, and optionally plot, variable importance
rfImp <- plotRFImportance(rf, plotData=plotImp, returnData=TRUE)
}
else {
rf <- useExistingRF
rfImp <- NA
}
# 3. Predict on test dataset
tstPred <- predictRF(rf=rf, df=dfTest)
# 4. Report on accuracy (updated for continuous or categorical)
rfAcc <- reportAccuracy(tstPred,
trueCol=yVar,
rndReport=3,
useLabel=useLabel,
reportR2=isTRUE(isContVar),
returnAcc=TRUE
)
# 5. Plot confusion data (updated for continuous vs. categorical) if requested
if(isTRUE(plotConf)) {
plotConfusion(tstPred,
trueCol=yVar,
useSub=useSub,
plotCont=isTRUE(isContVar),
rndTo=rndTo,
rndBucketsAuto=rndBucketsAuto,
nSig=nSig,
refXY=refXY
)
}
#6. Return data if requested
if(isTRUE(returnData)) return(list(rf=rf, rfImp=rfImp, tstPred=tstPred, rfAcc=rfAcc))
}
Updated function runFullRF() is tested on LA and Houston:
# Temperature predictions for LA
runFullRF(yVar="temperature_2m",
useExistingRF=rfTemp2m$rf,
dfTest=allCity %>% filter(tt=="test", year==2022, src=="LA"),
useLabel="LA temperature predictions",
useSub="LA",
isContVar=TRUE,
rndTo=0.5,
refXY=TRUE
)
##
## R-squared of LA temperature predictions is: 92.382% (RMSE 1.89 vs. 6.86 null)
## `geom_smooth()` using formula = 'y ~ x'
# Temperature predictions for Houston
runFullRF(yVar="temperature_2m",
useExistingRF=rfTemp2m$rf,
dfTest=allCity %>% filter(tt=="test", year==2022, src=="Houston"),
useLabel="Houston temperature predictions",
useSub="Houston",
isContVar=TRUE,
rndTo=0.5,
refXY=TRUE
)
##
## R-squared of Houston temperature predictions is: 97.223% (RMSE 1.44 vs. 8.63 null)
## `geom_smooth()` using formula = 'y ~ x'
A basic linear model can potentially drive better temperature predictions:
keyCities <- c("NYC", "Chicago")
lmMiniTemp <- allCity %>%
filter(tt=="train", year<2022, src %in% keyCities) %>%
select(t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m) %>%
lm(t~rh+d+rh:d+1, data=.)
summary(lmMiniTemp)
##
## Call:
## lm(formula = t ~ rh + d + rh:d + 1, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8377 -0.4461 -0.1708 0.2944 12.0201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.158e+01 7.965e-03 2709.77 <2e-16 ***
## rh -2.300e-01 1.150e-04 -1999.27 <2e-16 ***
## d 1.087e+00 6.448e-04 1685.07 <2e-16 ***
## rh:d -5.407e-04 9.068e-06 -59.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6296 on 147464 degrees of freedom
## Multiple R-squared: 0.9966, Adjusted R-squared: 0.9966
## F-statistic: 1.428e+07 on 3 and 147464 DF, p-value: < 2.2e-16
ggMiniTemp <- predict(lmMiniTemp,
newdata=allCity %>%
filter(tt=="test", year==2022, src %in% keyCities) %>%
select(rh=relativehumidity_2m, d=dewpoint_2m)
) %>%
mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% keyCities), temperature_2m),
pred=.,
err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp
## # A tibble: 13 × 6
## rnd5 n temperature_2m pred err err2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -25 2 -23.3 -23.3 -0.0227 0.00534
## 2 -20 15 -19.4 -19.1 0.270 0.187
## 3 -15 60 -14.5 -14.3 0.238 0.189
## 4 -10 201 -9.76 -9.54 0.221 0.263
## 5 -5 377 -4.52 -4.35 0.177 0.245
## 6 0 648 0.202 0.184 -0.0177 0.267
## 7 5 730 4.95 4.97 0.0224 0.248
## 8 10 719 10.2 10.1 -0.0591 0.302
## 9 15 692 14.9 14.9 -0.0380 0.433
## 10 20 920 20.1 20.2 0.0703 0.244
## 11 25 654 24.7 24.6 -0.0569 1.12
## 12 30 254 29.4 28.4 -0.984 3.56
## 13 35 38 34.2 31.2 -2.99 12.9
ggMiniTemp %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using City Linear Model on Same City Holdout Data",
x="New city actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
Predictions can then be explored in cities not included in the original linear model, starting with Houston:
ggMiniTemp_hou <- predict(lmMiniTemp,
newdata=allCity %>%
filter(tt=="test", year==2022, src %in% c("Houston")) %>%
select(rh=relativehumidity_2m, d=dewpoint_2m)
) %>%
mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% c("Houston")), temperature_2m),
pred=.,
err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp_hou
## # A tibble: 11 × 6
## rnd5 n temperature_2m pred err err2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -10 2 -7.95 -8.68 -0.730 0.600
## 2 -5 14 -4.25 -4.40 -0.150 0.428
## 3 0 38 0.429 0.608 0.179 0.222
## 4 5 197 5.21 5.29 0.0779 0.245
## 5 10 304 9.94 9.92 -0.0131 0.313
## 6 15 291 15.1 14.8 -0.292 0.700
## 7 20 507 20.3 20.0 -0.294 0.935
## 8 25 744 25.1 25.0 -0.0138 0.657
## 9 30 429 29.6 29.6 0.0214 1.12
## 10 35 145 34.3 33.0 -1.30 2.92
## 11 40 4 38.4 35.1 -3.34 11.3
ggMiniTemp_hou %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 0.850 0.922
ggMiniTemp_hou %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using City Linear Model on New City (Houston) Holdout Data",
x="New city (Houston) actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
The linear model is generally very accurate for Houston, with the exception of under-predicting the very highest temperatures. RMSE of temperature predictions is lowered to ~1 from ~1.5 observed using the random forest
Predictions are also explored in Los Angeles:
ggMiniTemp_lax <- predict(lmMiniTemp,
newdata=allCity %>%
filter(tt=="test", year==2022, src %in% c("LA")) %>%
select(rh=relativehumidity_2m, d=dewpoint_2m)
) %>%
mutate(select(allCity %>% filter(tt=="test", year==2022, src %in% c("LA")), temperature_2m),
pred=.,
err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTemp_lax
## # A tibble: 10 × 6
## rnd5 n temperature_2m pred err err2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 6 1.1 0.935 -0.165 0.205
## 2 5 127 5.72 5.52 -0.201 1.10
## 3 10 605 10.2 9.20 -1.01 5.02
## 4 15 754 15.1 13.9 -1.21 7.89
## 5 20 585 19.7 17.5 -2.15 20.7
## 6 25 331 24.7 22.1 -2.62 28.3
## 7 30 176 29.7 24.2 -5.52 55.0
## 8 35 49 34.4 25.9 -8.47 94.9
## 9 40 7 38.9 28.6 -10.2 124.
## 10 45 1 42.7 23.8 -18.9 356.
ggMiniTemp_lax %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 17.5 4.18
ggMiniTemp_lax %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using City Linear Model on New City (LA) Holdout Data",
x="New city (LA) actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
The linear model is generally inaccurate for LA, consistently underestimating temperatures. RMSE of temperature predictions is raised to ~4 from ~2 observed using the random forest
Los Angeles is meaningfully different from NYC and Chicago on key predictors:
tmpPlotData <- allCity %>%
select(src, relativehumidity_2m, dewpoint_2m, temperature_2m) %>%
mutate(across(where(is.numeric), .fns=round)) %>%
count(src, relativehumidity_2m, dewpoint_2m, temperature_2m)
tmpPlotData %>%
count(src, temperature_2m, dewpoint_2m, wt=n) %>%
ggplot(aes(x=temperature_2m, y=dewpoint_2m)) +
geom_point(aes(color=src, size=n), alpha=0.2) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(title="T/D by city")
## `geom_smooth()` using formula = 'y ~ x'
tmpPlotData %>%
count(src, temperature_2m, relativehumidity_2m, wt=n) %>%
ggplot(aes(x=temperature_2m, y=relativehumidity_2m)) +
geom_point(aes(color=src, size=n), alpha=0.1) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(title="T/RH by city")
## `geom_smooth()` using formula = 'y ~ x'
Los Angeles is routinely hot and arid, while the other cities tend to be humid when they are hot. Data for an additional low-humidity city are downloaded, cached to avoid multiple hits to the server:
# Hourly data download for Las Vegas, NV
testURLHourly <- helperOpenMeteoURL(cityName="Las Vegas NV",
hourlyIndices=1:nrow(tblMetricsHourly),
startDate="2010-01-01",
endDate="2023-12-31",
tz="America/Los_Angeles"
)
##
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=36.21&longitude=-115.22&start_date=2010-01-01&end_date=2023-12-31&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=America%2FLos_Angeles"
# Download file
if(!file.exists("testOM_hourly_las.json")) {
fileDownload(fileName="testOM_hourly_las.json", url=testURLHourly)
} else {
cat("\nFile testOM_hourly_las.json already exists, skipping download\n")
}
##
## File testOM_hourly_las.json already exists, skipping download
# Daily data download for Las Vegas, NV
testURLDaily <- helperOpenMeteoURL(cityName="Las Vegas NV",
dailyIndices=1:nrow(tblMetricsDaily),
startDate="2010-01-01",
endDate="2023-12-31",
tz="America/Los_Angeles"
)
##
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=36.21&longitude=-115.22&start_date=2010-01-01&end_date=2023-12-31&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=America%2FLos_Angeles"
# Download file
if(!file.exists("testOM_daily_las.json")) {
fileDownload(fileName="testOM_daily_las.json", url=testURLDaily)
} else {
cat("\nFile testOM_daily_las.json already exists, skipping download\n")
}
##
## File testOM_daily_las.json already exists, skipping download
The daily and hourly datasets are loaded:
# Read daily JSON file
lasOMDaily <- formatOpenMeteoJSON("testOM_daily_las.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 5,113 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 2 10.3 -1.3
## 2 2010-01-02 2010-01-02 0 14.2 -0.4
## 3 2010-01-03 2010-01-03 0 14.2 0.7
## 4 2010-01-04 2010-01-04 1 13.3 2.8
## 5 2010-01-05 2010-01-05 1 13.6 0.7
## 6 2010-01-06 2010-01-06 1 15.8 2.5
## 7 2010-01-07 2010-01-07 2 16.1 6
## 8 2010-01-08 2010-01-08 1 11.2 1.2
## 9 2010-01-09 2010-01-09 1 13.2 0.5
## 10 2010-01-10 2010-01-10 2 15.6 5.9
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 36.2 -115. 69.8 -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 36.23901
## longitude: -115.1625
## generationtime_ms: 69.77499
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 686
# Read hourly JSON file
lasTemp <- formatOpenMeteoJSON("testOM_hourly_las.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 122,712 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 1.3 53
## 2 2010-01-01 01:00:00 2010-01-01 1 0.5 56
## 3 2010-01-01 02:00:00 2010-01-01 2 0.1 56
## 4 2010-01-01 03:00:00 2010-01-01 3 -0.3 57
## 5 2010-01-01 04:00:00 2010-01-01 4 -0.8 59
## 6 2010-01-01 05:00:00 2010-01-01 5 -1.1 60
## 7 2010-01-01 06:00:00 2010-01-01 6 -1.3 60
## 8 2010-01-01 07:00:00 2010-01-01 7 -1.2 58
## 9 2010-01-01 08:00:00 2010-01-01 8 -1.2 56
## 10 2010-01-01 09:00:00 2010-01-01 9 -0.1 56
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 36.2 -115. 7256. -25200 America/Los_Angeles
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 36.23901
## longitude: -115.1625
## generationtime_ms: 7256.367
## utc_offset_seconds: -25200
## timezone: America/Los_Angeles
## timezone_abbreviation: PDT
## elevation: 686
##
## Rows: 122,712
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> 1.3, 0.5, 0.1, -0.3, -0.8, -1.1, -1.…
## $ relativehumidity_2m <int> 53, 56, 56, 57, 59, 60, 60, 58, 56, …
## $ dewpoint_2m <dbl> -7.2, -7.3, -7.6, -7.7, -7.8, -7.9, …
## $ apparent_temperature <dbl> -2.5, -3.3, -3.6, -4.1, -4.3, -4.7, …
## $ pressure_msl <dbl> 1031.2, 1031.1, 1030.8, 1031.7, 1031…
## $ surface_pressure <dbl> 947.4, 947.1, 946.7, 947.4, 946.9, 9…
## $ precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover <int> 12, 12, 12, 12, 12, 9, 11, 6, 3, 19,…
## $ cloudcover_low <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover_mid <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 28,…
## $ cloudcover_high <int> 40, 40, 40, 39, 40, 29, 32, 19, 10, …
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 240, …
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 185, …
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 55, 6…
## $ windspeed_10m <dbl> 5.0, 5.5, 4.7, 4.9, 3.1, 3.5, 3.4, 3…
## $ windspeed_100m <dbl> 5.7, 7.2, 6.9, 6.5, 6.3, 6.0, 6.9, 6…
## $ winddirection_10m <int> 291, 293, 293, 287, 291, 294, 302, 2…
## $ winddirection_100m <int> 342, 342, 351, 354, 24, 17, 6, 6, 35…
## $ windgusts_10m <dbl> 9.7, 10.1, 10.1, 9.7, 9.0, 9.0, 9.0,…
## $ et0_fao_evapotranspiration <dbl> 0.01, 0.01, 0.01, 0.01, 0.00, 0.00, …
## $ weathercode <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ vapor_pressure_deficit <dbl> 0.31, 0.28, 0.27, 0.26, 0.24, 0.23, …
## $ soil_temperature_0_to_7cm <dbl> 0.0, -0.3, -0.6, -0.8, -1.0, -1.1, -…
## $ soil_temperature_7_to_28cm <dbl> 5.2, 5.1, 5.0, 4.9, 4.7, 4.6, 4.5, 4…
## $ soil_temperature_28_to_100cm <dbl> 10.2, 10.2, 10.2, 10.2, 10.2, 10.2, …
## $ soil_temperature_100_to_255cm <dbl> 21.3, 21.3, 21.3, 21.3, 21.3, 21.3, …
## $ soil_moisture_0_to_7cm <dbl> 0.069, 0.069, 0.069, 0.069, 0.069, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.126, 0.126, 0.126, 0.126, 0.126, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.142, 0.142, 0.142, 0.142, 0.142, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.12, 0.12, 0.12, 0.12, 0.12, 0.12, …
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 2, 1, 1, 1, 1, 1, 0, 0, 0, 1, 4, 10,…
## $ pct_relativehumidity_2m <dbl> 87, 88, 88, 89, 90, 91, 91, 90, 88, …
## $ pct_dewpoint_2m <dbl> 23, 22, 21, 21, 20, 20, 19, 18, 17, …
## $ pct_apparent_temperature <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 10,…
## $ pct_pressure_msl <dbl> 99, 99, 99, 99, 99, 99, 99, 99, 99, …
## $ pct_surface_pressure <dbl> 98, 98, 98, 98, 98, 98, 98, 98, 98, …
## $ pct_precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover <dbl> 65, 65, 65, 65, 65, 62, 64, 59, 54, …
## $ pct_cloudcover_low <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover_mid <dbl> 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 70, 8…
## $ pct_cloudcover_high <dbl> 75, 75, 75, 74, 75, 71, 72, 68, 64, …
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 64, 7…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 66, 7…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 74, 8…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 59, 6…
## $ pct_windspeed_10m <dbl> 27, 33, 24, 27, 10, 14, 13, 11, 19, …
## $ pct_windspeed_100m <dbl> 24, 33, 31, 29, 28, 26, 31, 31, 27, …
## $ pct_winddirection_10m <dbl> 75, 76, 76, 74, 75, 76, 79, 77, 74, …
## $ pct_winddirection_100m <dbl> 94, 94, 96, 97, 7, 5, 1, 1, 96, 93, …
## $ pct_windgusts_10m <dbl> 15, 17, 17, 15, 12, 12, 12, 12, 9, 1…
## $ pct_et0_fao_evapotranspiration <dbl> 5, 5, 5, 5, 0, 0, 0, 0, 0, 10, 37, 5…
## $ pct_weathercode <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72,…
## $ pct_vapor_pressure_deficit <dbl> 5, 4, 4, 4, 3, 3, 3, 3, 3, 4, 7, 19,…
## $ pct_soil_temperature_0_to_7cm <dbl> 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 4, …
## $ pct_soil_temperature_7_to_28cm <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, …
## $ pct_soil_temperature_28_to_100cm <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ pct_soil_temperature_100_to_255cm <dbl> 44, 44, 44, 44, 44, 44, 44, 44, 44, …
## $ pct_soil_moisture_0_to_7cm <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ pct_soil_moisture_7_to_28cm <dbl> 75, 75, 75, 75, 75, 75, 75, 75, 75, …
## $ pct_soil_moisture_28_to_100cm <dbl> 64, 64, 64, 64, 64, 64, 64, 64, 64, …
## $ pct_soil_moisture_100_to_255cm <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day Fall Day 15288
## 6 Fall-Night Fall Night 15288
## 7 Winter-Day Winter Day 15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 5113
## 2 1 1 Night 5113
## 3 2 2 Night 5113
## 4 3 3 Night 5113
## 5 4 4 Night 5113
## 6 5 5 Night 5113
## 7 6 6 Night 5113
## 8 7 7 Day 5113
## 9 8 8 Day 5113
## 10 9 9 Day 5113
## 11 10 10 Day 5113
## 12 11 11 Day 5113
## 13 12 12 Day 5113
## 14 13 13 Day 5113
## 15 14 14 Day 5113
## 16 15 15 Day 5113
## 17 16 16 Day 5113
## 18 17 17 Day 5113
## 19 18 18 Day 5113
## 20 19 19 Night 5113
## 21 20 20 Night 5113
## 22 21 21 Night 5113
## 23 22 22 Night 5113
## 24 23 23 Night 5113
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 10080
## 7 Jul Summer 10416
## 8 Aug Summer 10416
## 9 Sep Fall 10080
## 10 Oct Fall 10416
## 11 Nov Fall 10080
## 12 Dec Winter 10416
An integrated set of all-city test and train data is updated:
# Bind all the data frames
allCity <- list("NYC"=nycTemp,
"LA"=laxTemp,
"Chicago"=chiTemp,
"Houston"=houTemp,
"Vegas"=lasTemp
) %>%
bind_rows(.id="src")
# Create the index for training data
set.seed(24070113)
idxTrain_v2 <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)
# Add test-train flag to full dataset
allCity <- allCity %>%
mutate(tt=ifelse(row_number() %in% idxTrain_v2, "train", "test"),
fct_src=factor(src))
allCity
## # A tibble: 608,784 × 83
## src time date hour temperature_2m relativehumidity_2m
## <chr> <dttm> <date> <int> <dbl> <int>
## 1 NYC 2010-01-01 00:00:00 2010-01-01 0 -1.1 95
## 2 NYC 2010-01-01 01:00:00 2010-01-01 1 -1 96
## 3 NYC 2010-01-01 02:00:00 2010-01-01 2 -1 96
## 4 NYC 2010-01-01 03:00:00 2010-01-01 3 -0.8 97
## 5 NYC 2010-01-01 04:00:00 2010-01-01 4 -0.9 97
## 6 NYC 2010-01-01 05:00:00 2010-01-01 5 -0.8 97
## 7 NYC 2010-01-01 06:00:00 2010-01-01 6 -0.7 97
## 8 NYC 2010-01-01 07:00:00 2010-01-01 7 -0.5 97
## 9 NYC 2010-01-01 08:00:00 2010-01-01 8 -0.6 97
## 10 NYC 2010-01-01 09:00:00 2010-01-01 9 -0.6 97
## # ℹ 608,774 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>%
count(year, src, tt) %>%
pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 10 × 16
## src tt `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
## <chr> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Chicago test 2569 2593 2572 2660 2623 2591 2583 2679 2692
## 2 Chicago train 6191 6167 6212 6100 6137 6169 6201 6081 6068
## 3 Houston test 2687 2539 2612 2665 2675 2607 2652 2686 2662
## 4 Houston train 6073 6221 6172 6095 6085 6153 6132 6074 6098
## 5 LA test 2565 2607 2588 2674 2627 2641 2685 2650 2655
## 6 LA train 6195 6153 6196 6086 6133 6119 6099 6110 6105
## 7 NYC test 2633 2602 2622 2623 2672 2583 2603 2607 2670
## 8 NYC train 6127 6158 6162 6137 6088 6177 6181 6153 6090
## 9 Vegas test 2582 2528 2642 2619 2633 2587 2650 2679 2618
## 10 Vegas train 6178 6232 6142 6141 6127 6173 6134 6081 6142
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## # `2023` <int>
Distributions of several key variables are explored:
keyVars <- c('temperature_2m',
'relativehumidity_2m',
'dewpoint_2m',
'shortwave_radiation',
'vapor_pressure_deficit',
'soil_temperature_28_to_100cm',
'soil_temperature_100_to_255cm',
'soil_moisture_28_to_100cm',
'soil_moisture_100_to_255cm'
)
allCity %>%
colSelector(vecSelect=c("src", keyVars)) %>%
pivot_longer(cols=-c(src)) %>%
ggplot(aes(x=src, y=value)) +
geom_boxplot(aes(fill=src)) +
facet_wrap(~name, scales="free_y") +
labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") +
scale_fill_discrete(NULL)
Las Vegas stands out for especially low relative humidity (even relative to LA), as well as dry soil (similar to LA)
The scatter of temperature and dewpoint is also explored:
allCity %>%
select(t=temperature_2m, d=dewpoint_2m, src) %>%
mutate(across(.cols=where(is.numeric), .fns=function(x) round(x))) %>%
count(src, t, d) %>%
ggplot(aes(x=t, y=d)) +
geom_point(aes(size=n, color=src), alpha=0.5) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(x="Temperature (C)", y="Dewpoint (C)", title="Temperature vs. Dewpoint", subtitle="Hourly") +
scale_color_discrete(NULL) +
scale_size_continuous("# Obs")
## `geom_smooth()` using formula = 'y ~ x'
allCity %>%
group_by(src) %>%
summarize(cor_td=cor(temperature_2m, dewpoint_2m))
## # A tibble: 5 × 2
## src cor_td
## <chr> <dbl>
## 1 Chicago 0.950
## 2 Houston 0.834
## 3 LA 0.273
## 4 NYC 0.919
## 5 Vegas 0.371
Las Vegas is similar to LA, with lower dewpoints. The more humid cities have 80%+ correlation between temperature and dewpoint, dropping to ~40% correlation in the drier cities
Models for predicting city (one with soil temperature, one without) are saved using data without Las Vegas, for application to the new Las Vegas data:
# Run with all variables
rfCityFull <- runFullRF(allCity %>%
mutate(fct_src=factor(src)) %>%
filter(year<2022, tt=="train", src!="Vegas"),
yVar="fct_src",
xVars=varsTrain,
dfTest=allCity %>%
mutate(fct_src=factor(src)) %>%
filter(year==2022, tt=="test", src!="Vegas"),
isContVar=FALSE,
returnData=TRUE
)
## Warning: Dropped unused factor level(s) in dependent variable: Vegas.
## Growing trees.. Progress: 97%. Estimated remaining time: 0 seconds.
##
## Accuracy of test data is: 100%
predictRF(rfCityFull$rf, df=allCity %>% filter(src=="Vegas")) %>% count(pred)
## # A tibble: 1 × 2
## pred n
## <fct> <int>
## 1 LA 122712
# Run without moisture variables
rfCityNoMoisture <- runFullRF(allCity %>%
mutate(fct_src=factor(src)) %>%
filter(year<2022, tt=="train", src!="Vegas"),
yVar="fct_src",
xVars=varsTrain[!grepl(pattern="moist", x=varsTrain)],
dfTest=allCity %>%
mutate(fct_src=factor(src)) %>%
filter(year==2022, tt=="test", src!="Vegas"),
isContVar=FALSE,
returnData=TRUE
)
## Warning: Dropped unused factor level(s) in dependent variable: Vegas.
## Growing trees.. Progress: 65%. Estimated remaining time: 16 seconds.
##
## Accuracy of test data is: 98.725%
predictRF(rfCityNoMoisture$rf, df=houTemp) %>% count(pred)
## # A tibble: 1 × 2
## pred n
## <fct> <int>
## 1 Houston 122712
The previously trained random forest models overwhelmingly predict Las Vegas as Los Angeles (if soil moisture is included) or Houston (if soil moisture is excluded)
The linear approximation for estimating temperature based on dewpoint and relative humidity is applied:
ggMiniTempLAS <- predict(lmMiniTemp,
newdata=allCity %>%
filter(src=="Vegas", tt=="test", year==2022) %>%
select(rh=relativehumidity_2m, d=dewpoint_2m)
) %>%
mutate(allCity %>% filter(src=="Vegas", tt=="test", year==2022) %>% select(temperature_2m),
pred=.,
err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniTempLAS
## # A tibble: 11 × 6
## rnd5 n temperature_2m pred err err2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -5 1 -2.7 -2.95 -0.252 0.0633
## 2 0 48 1.09 0.469 -0.616 2.42
## 3 5 264 5.52 3.97 -1.55 6.82
## 4 10 406 9.96 6.39 -3.56 23.1
## 5 15 345 14.7 8.61 -6.11 54.3
## 6 20 294 20.1 11.4 -8.65 98.2
## 7 25 370 25.2 16.8 -8.41 110.
## 8 30 407 29.8 20.4 -9.37 142.
## 9 35 274 34.8 22.6 -12.3 202.
## 10 40 119 39.7 23.1 -16.6 298.
## 11 45 9 43.5 22.8 -20.7 430.
ggMiniTempLAS %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 99.3 9.97
ggMiniTempLAS %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using Old City Linear Model on New City Data",
x="New city actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
The linear approximation based on dewpoint and relative humidity is inaccurate for predicting temperatures in Las Vegas, consistent with Las Vegas having T/D trends very different from originally modeled cities, NYC and Chicago
Las Vegas is meaningfully different from NYC and Chicago on key predictors:
tmpPlotData <- allCity %>%
select(src, relativehumidity_2m, dewpoint_2m, temperature_2m) %>%
mutate(across(where(is.numeric), .fns=round)) %>%
count(src, relativehumidity_2m, dewpoint_2m, temperature_2m)
tmpPlotData %>%
count(src, temperature_2m, dewpoint_2m, wt=n) %>%
ggplot(aes(x=temperature_2m, y=dewpoint_2m)) +
geom_point(aes(color=src, size=n), alpha=0.2) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(title="T/D by city")
## `geom_smooth()` using formula = 'y ~ x'
tmpPlotData %>%
count(src, temperature_2m, relativehumidity_2m, wt=n) %>%
ggplot(aes(x=temperature_2m, y=relativehumidity_2m)) +
geom_point(aes(color=src, size=n), alpha=0.1) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(title="T/RH by city")
## `geom_smooth()` using formula = 'y ~ x'
The existing random forest model, trained on NYC and Chicago, is also tested on Las Vegas temperatures:
# Temperature predictions for Vegas
runFullRF(yVar="temperature_2m",
useExistingRF=rfTemp2m$rf,
dfTest=allCity %>% filter(tt=="test", year==2022, src=="Vegas"),
useLabel="Las Vegas temperature predictions",
useSub="Las Vegas",
isContVar=TRUE,
rndTo=0.5,
refXY=TRUE
)
##
## R-squared of Las Vegas temperature predictions is: 90.29% (RMSE 3.32 vs. 10.65 null)
## `geom_smooth()` using formula = 'y ~ x'
The random forest is more accurate than the linear model in predicting temperatures in Las Vegas based on training data from other cities. RMSE is ~3 rather than the ~10 from the linear model application
All combinations of two variables are explored for predicting temperature on a smaller training dataset:
# Train and test data
dfTrainTemp <- allCity %>%
filter(!(src %in% c("Vegas")), tt=="train", year<2022) %>%
mutate(fct_src=factor(src))
dfTestTemp <- allCity %>%
filter(!(src %in% c("Vegas")), tt=="test", year==2022) %>%
mutate(fct_src=factor(src))
# Variables to explore
possTempVars <- c(varsTrain[!str_detect(varsTrain, "^temp|ature$")], "month", "tod")
# Subsets to use
set.seed(24070815)
idxSmallTemp <- sample(1:nrow(dfTrainTemp), 5000, replace=FALSE)
mtxSmallTemp <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possTempVars)-1)) {
for(idx2 in (idx1+1):length(possTempVars)) {
r2SmallTemp <- runFullRF(dfTrain=dfTrainTemp[idxSmallTemp,],
yVar="temperature_2m",
xVars=possTempVars[c(idx1, idx2)],
dfTest=dfTestTemp,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallTemp <- rbind(mtxSmallTemp, c(idx1, idx2, r2SmallTemp))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.429% (RMSE 9.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.568% (RMSE 5.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.904% (RMSE 8.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.761% (RMSE 9.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.887% (RMSE 10.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.689% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.11% (RMSE 9.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.088% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.741% (RMSE 9.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.102% (RMSE 9.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.91% (RMSE 10.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.741% (RMSE 8.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.008% (RMSE 9.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.981% (RMSE 9.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.124% (RMSE 9.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.458% (RMSE 10.01 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.166% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.926% (RMSE 10.03 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.027% (RMSE 10.03 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.149% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.99% (RMSE 7.99 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.581% (RMSE 9.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.066% (RMSE 7.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.913% (RMSE 2.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.307% (RMSE 3.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.703% (RMSE 5.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.437% (RMSE 8.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.273% (RMSE 9.74 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.693% (RMSE 9.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.371% (RMSE 9.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.175% (RMSE 10.13 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.815% (RMSE 10.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.027% (RMSE 6.98 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.472% (RMSE 6.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.346% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.67% (RMSE 0.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.088% (RMSE 9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.674% (RMSE 9.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.583% (RMSE 10.27 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.53% (RMSE 10.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.545% (RMSE 10.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.577% (RMSE 10.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.095% (RMSE 10.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.496% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.226% (RMSE 10.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.277% (RMSE 9.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.385% (RMSE 9.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.7% (RMSE 9.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.058% (RMSE 9.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.35% (RMSE 10.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.799% (RMSE 10.36 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.856% (RMSE 10.36 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.208% (RMSE 10.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.26% (RMSE 10.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.901% (RMSE 8.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.266% (RMSE 9.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.932% (RMSE 1.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.996% (RMSE 2.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.519% (RMSE 4.22 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.807% (RMSE 5.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.043% (RMSE 8.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.491% (RMSE 10.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.247% (RMSE 9.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.563% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.445% (RMSE 10.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.944% (RMSE 10.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.033% (RMSE 6.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.427% (RMSE 6.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.112% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.703% (RMSE 6.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.493% (RMSE 5.25 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.963% (RMSE 5.89 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.674% (RMSE 6.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.516% (RMSE 6.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.282% (RMSE 5.48 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.68% (RMSE 5.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.17% (RMSE 5.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.591% (RMSE 6.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.957% (RMSE 4.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.145% (RMSE 4.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.794% (RMSE 4.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.678% (RMSE 5.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.79% (RMSE 6.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.682% (RMSE 6.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.451% (RMSE 5.93 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.143% (RMSE 5.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.739% (RMSE 6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.916% (RMSE 3.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.176% (RMSE 5.49 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.885% (RMSE 0.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.287% (RMSE 2.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.311% (RMSE 4.12 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.826% (RMSE 4.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.575% (RMSE 5.74 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.127% (RMSE 5.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.572% (RMSE 5.45 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.311% (RMSE 6.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.894% (RMSE 5.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.081% (RMSE 6.23 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.822% (RMSE 5.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.887% (RMSE 5.51 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.518% (RMSE 6.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.956% (RMSE 4.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.745% (RMSE 8.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.842% (RMSE 9.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.151% (RMSE 8.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.112% (RMSE 8.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.85% (RMSE 8.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.306% (RMSE 8.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.438% (RMSE 9.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.883% (RMSE 8.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.09% (RMSE 8.31 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.366% (RMSE 8.49 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.024% (RMSE 8.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.767% (RMSE 9.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.717% (RMSE 9.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.207% (RMSE 9.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.618% (RMSE 9.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.859% (RMSE 9.25 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.139% (RMSE 7.63 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.623% (RMSE 8.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.748% (RMSE 6.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.979% (RMSE 2.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.88% (RMSE 4.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.025% (RMSE 5.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.042% (RMSE 7.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.374% (RMSE 9.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.767% (RMSE 9.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.334% (RMSE 9.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.753% (RMSE 9.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.253% (RMSE 9.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.238% (RMSE 7.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.056% (RMSE 6.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.286% (RMSE 8.93 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.817% (RMSE 9.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.3% (RMSE 9.74 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.245% (RMSE 9.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.25% (RMSE 9.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.46% (RMSE 9.45 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.289% (RMSE 9.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.55% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.526% (RMSE 8.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.502% (RMSE 8.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.704% (RMSE 9.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.106% (RMSE 9.12 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.484% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.298% (RMSE 9.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.56% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.027% (RMSE 9.87 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.633% (RMSE 9.89 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.008% (RMSE 8.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.242% (RMSE 9.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.477% (RMSE 6.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.082% (RMSE 2.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.986% (RMSE 4.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.676% (RMSE 5.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.736% (RMSE 8.27 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.046% (RMSE 9.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.806% (RMSE 9.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.218% (RMSE 9.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.571% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.86% (RMSE 9.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.211% (RMSE 6.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.768% (RMSE 6.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.445% (RMSE 9.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.088% (RMSE 10.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.043% (RMSE 10.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.587% (RMSE 10.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.672% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.859% (RMSE 9.87 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 10.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.057% (RMSE 9.47 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.875% (RMSE 9.48 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.887% (RMSE 9.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.131% (RMSE 9.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.397% (RMSE 10.28 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.15% (RMSE 10.13 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.72% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.77% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.215% (RMSE 10.41 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.147% (RMSE 8.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.585% (RMSE 10.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.717% (RMSE 7.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.485% (RMSE 2.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.222% (RMSE 4.74 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.135% (RMSE 6.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.954% (RMSE 8.7 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.324% (RMSE 9.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.715% (RMSE 9.99 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.122% (RMSE 10.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.423% (RMSE 10.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.41% (RMSE 10.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.009% (RMSE 7.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.983% (RMSE 7.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.19% (RMSE 10.23 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.913% (RMSE 10.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.521% (RMSE 10.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.228% (RMSE 10.18 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.842% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.242% (RMSE 10.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.786% (RMSE 9.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.899% (RMSE 9.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.622% (RMSE 9.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.202% (RMSE 9.8 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.261% (RMSE 10.23 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.787% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.425% (RMSE 10.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.516% (RMSE 10.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.949% (RMSE 10.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.915% (RMSE 8.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.8% (RMSE 10.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.522% (RMSE 7.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.046% (RMSE 4.41 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.623% (RMSE 5.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.81% (RMSE 6.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.356% (RMSE 8.8 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.174% (RMSE 9.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.885% (RMSE 10.04 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.068% (RMSE 10.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.175% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.351% (RMSE 10.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.321% (RMSE 7.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.513% (RMSE 7.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.181% (RMSE 10.23 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.207% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.276% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.164% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.696% (RMSE 10.15 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.841% (RMSE 9.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.143% (RMSE 9.47 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.715% (RMSE 9.66 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.786% (RMSE 9.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.242% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.538% (RMSE 9.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.678% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.324% (RMSE 9.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.554% (RMSE 10.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.586% (RMSE 8.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.349% (RMSE 10.12 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.067% (RMSE 8.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.189% (RMSE 4.63 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.093% (RMSE 5.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.81% (RMSE 6.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.071% (RMSE 8.76 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.24% (RMSE 9.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.185% (RMSE 9.91 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.289% (RMSE 10.12 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.112% (RMSE 9.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.54% (RMSE 10.22 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.685% (RMSE 7.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.754% (RMSE 8.01 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.191% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.731% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.638% (RMSE 9.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.92% (RMSE 10.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.027% (RMSE 9.47 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.557% (RMSE 9.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.768% (RMSE 9.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.424% (RMSE 9.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.623% (RMSE 10.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.906% (RMSE 10.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.942% (RMSE 10.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.691% (RMSE 10.36 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.532% (RMSE 8.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.065% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.031% (RMSE 7.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.521% (RMSE 2.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.598% (RMSE 4.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.12% (RMSE 5.96 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.263% (RMSE 8.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.47% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.16% (RMSE 9.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.485% (RMSE 9.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.039% (RMSE 10.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.357% (RMSE 10.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.638% (RMSE 7.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.487% (RMSE 7.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.102% (RMSE 9.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.189% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.264% (RMSE 9.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.97% (RMSE 9.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.181% (RMSE 9.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.948% (RMSE 9.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.675% (RMSE 10.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.074% (RMSE 10.13 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.891% (RMSE 10.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.19% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.818% (RMSE 10.36 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.005% (RMSE 8.7 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.852% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.358% (RMSE 7.47 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.347% (RMSE 2.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.012% (RMSE 4.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.953% (RMSE 6.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.794% (RMSE 8.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.705% (RMSE 9.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.2% (RMSE 9.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.392% (RMSE 9.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.316% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.699% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.662% (RMSE 7.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.94% (RMSE 7.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.523% (RMSE 10.06 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.773% (RMSE 9.99 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.936% (RMSE 9.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.686% (RMSE 9.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.661% (RMSE 9.44 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.668% (RMSE 9.38 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.026% (RMSE 9.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.733% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.329% (RMSE 9.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.428% (RMSE 9.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.042% (RMSE 10.03 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.997% (RMSE 8.45 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.921% (RMSE 9.65 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.934% (RMSE 7.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.474% (RMSE 3.04 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.676% (RMSE 4.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.358% (RMSE 5.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.489% (RMSE 8.48 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.825% (RMSE 9.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.482% (RMSE 9.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.046% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.669% (RMSE 9.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.469% (RMSE 9.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.054% (RMSE 7.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.255% (RMSE 7.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.485% (RMSE 9.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.672% (RMSE 9.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.964% (RMSE 9.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.018% (RMSE 9.81 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.516% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.288% (RMSE 10.39 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.913% (RMSE 10.25 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.019% (RMSE 10.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.434% (RMSE 10.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.001% (RMSE 10.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.883% (RMSE 8.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.572% (RMSE 10 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.916% (RMSE 7.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.409% (RMSE 3.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.112% (RMSE 4.98 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.798% (RMSE 6.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.508% (RMSE 8.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.756% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.603% (RMSE 10.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.627% (RMSE 10.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.687% (RMSE 10.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.273% (RMSE 10.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.735% (RMSE 7.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.03% (RMSE 7.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.086% (RMSE 10.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.677% (RMSE 9.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.621% (RMSE 9.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.785% (RMSE 9.66 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.877% (RMSE 9.48 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.718% (RMSE 9.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.868% (RMSE 9.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.49% (RMSE 9.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.479% (RMSE 9.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.869% (RMSE 7.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.992% (RMSE 9.19 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.596% (RMSE 7.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.366% (RMSE 2.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.954% (RMSE 3.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.429% (RMSE 5.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.403% (RMSE 7.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.699% (RMSE 9.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.297% (RMSE 9.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.315% (RMSE 9.51 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.759% (RMSE 9.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.278% (RMSE 9.63 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.03% (RMSE 6.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.971% (RMSE 6.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.862% (RMSE 9.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.844% (RMSE 9.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.169% (RMSE 9.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.689% (RMSE 9.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.163% (RMSE 9.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.585% (RMSE 9.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.932% (RMSE 9.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.104% (RMSE 9.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.787% (RMSE 7.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.386% (RMSE 9.22 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.94% (RMSE 7.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.855% (RMSE 2.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.061% (RMSE 3.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.203% (RMSE 5.18 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.167% (RMSE 7.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.851% (RMSE 9.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.851% (RMSE 9.31 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.322% (RMSE 9.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.277% (RMSE 9.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.266% (RMSE 9.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.637% (RMSE 6.93 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.908% (RMSE 6.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.073% (RMSE 9.47 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.719% (RMSE 9.66 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.731% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.305% (RMSE 9.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.25% (RMSE 9.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.522% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.422% (RMSE 9.84 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.267% (RMSE 8.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.695% (RMSE 9.44 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.747% (RMSE 7.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.935% (RMSE 2.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.475% (RMSE 3.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.738% (RMSE 5.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.936% (RMSE 8.13 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.704% (RMSE 9.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.667% (RMSE 9.44 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.314% (RMSE 9.68 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.882% (RMSE 9.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.244% (RMSE 9.8 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.438% (RMSE 7.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.275% (RMSE 6.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.786% (RMSE 9.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.431% (RMSE 9.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.527% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.667% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.917% (RMSE 9.76 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.841% (RMSE 9.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.882% (RMSE 8.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.762% (RMSE 9.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.646% (RMSE 7.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.171% (RMSE 2.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.649% (RMSE 3.8 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.192% (RMSE 5.39 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.825% (RMSE 7.93 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.773% (RMSE 9.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.767% (RMSE 9.49 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.47% (RMSE 9.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.618% (RMSE 9.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.914% (RMSE 9.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.493% (RMSE 7.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.696% (RMSE 7 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.165% (RMSE 9.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.291% (RMSE 10.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.954% (RMSE 10.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.332% (RMSE 10.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.1% (RMSE 10.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.903% (RMSE 8.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.123% (RMSE 9.97 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.542% (RMSE 7.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.665% (RMSE 2.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.68% (RMSE 4.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.732% (RMSE 6.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.843% (RMSE 9.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.725% (RMSE 10.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.505% (RMSE 10.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.44% (RMSE 10.38 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.633% (RMSE 10.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.062% (RMSE 10.35 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.063% (RMSE 7.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.964% (RMSE 7.21 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.182% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.131% (RMSE 10.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.786% (RMSE 10.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.983% (RMSE 10.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.804% (RMSE 8.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.402% (RMSE 9.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.643% (RMSE 7.74 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.705% (RMSE 2.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.142% (RMSE 4.63 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.855% (RMSE 6.08 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.35% (RMSE 8.99 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.672% (RMSE 10.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.579% (RMSE 10.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.524% (RMSE 10.27 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.096% (RMSE 10.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.885% (RMSE 10.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.1% (RMSE 7.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.588% (RMSE 7.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.393% (RMSE 9.95 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.371% (RMSE 10.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.714% (RMSE 10.49 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.888% (RMSE 8.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.623% (RMSE 9.78 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.561% (RMSE 7.81 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.832% (RMSE 2.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.837% (RMSE 4.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.57% (RMSE 5.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.745% (RMSE 8.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.956% (RMSE 10.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.741% (RMSE 10.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.074% (RMSE 10.4 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.185% (RMSE 10.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.327% (RMSE 10.17 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.74% (RMSE 7.3 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.287% (RMSE 7.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.992% (RMSE 9.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.754% (RMSE 10.44 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.618% (RMSE 8.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.681% (RMSE 9.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.891% (RMSE 7.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.861% (RMSE 2.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.013% (RMSE 4.41 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.121% (RMSE 5.87 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.831% (RMSE 8.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.142% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.857% (RMSE 10.04 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.315% (RMSE 10.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.64% (RMSE 10.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.713% (RMSE 10.15 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.573% (RMSE 7.39 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.879% (RMSE 7.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.715% (RMSE 9.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.923% (RMSE 8.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.109% (RMSE 10.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.701% (RMSE 7.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.65% (RMSE 2.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.967% (RMSE 4.42 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.596% (RMSE 5.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.377% (RMSE 8.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.408% (RMSE 10.38 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.302% (RMSE 10.23 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.066% (RMSE 10.4 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.751% (RMSE 10.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.995% (RMSE 10.45 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.51% (RMSE 7.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.323% (RMSE 7.26 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.276% (RMSE 10.12 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.362% (RMSE 8.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.634% (RMSE 7.01 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.115% (RMSE 2.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.72% (RMSE 3.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.969% (RMSE 4.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.809% (RMSE 7.14 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.763% (RMSE 8.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.305% (RMSE 8.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.562% (RMSE 8.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.478% (RMSE 8.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.885% (RMSE 8.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.747% (RMSE 6.09 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.572% (RMSE 6.01 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.948% (RMSE 8.71 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.827% (RMSE 7.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.636% (RMSE 2.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.757% (RMSE 4.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.744% (RMSE 5.91 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.76% (RMSE 8.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.877% (RMSE 9.6 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.606% (RMSE 9.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.554% (RMSE 9.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.801% (RMSE 9.88 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.244% (RMSE 10.07 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.584% (RMSE 7.31 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.405% (RMSE 7.32 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.579% (RMSE 9.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.146% (RMSE 2.29 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.291% (RMSE 3.4 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.955% (RMSE 4.66 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.074% (RMSE 6.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.112% (RMSE 7.49 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.738% (RMSE 7.3 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.973% (RMSE 7.57 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.898% (RMSE 7.79 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.974% (RMSE 7.92 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.973% (RMSE 5.51 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.739% (RMSE 5.43 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.766% (RMSE 7.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.139% (RMSE 2.52 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.978% (RMSE 2.55 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.809% (RMSE 2.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.028% (RMSE 2.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.726% (RMSE 2.61 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.291% (RMSE 2.69 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.555% (RMSE 2.64 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.044% (RMSE 3.11 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.807% (RMSE 2.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.102% (RMSE 2.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.584% (RMSE 4.58 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.924% (RMSE 4.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.24% (RMSE 4.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.578% (RMSE 4.7 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.431% (RMSE 4.72 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.301% (RMSE 4.73 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.716% (RMSE 4.8 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.109% (RMSE 4.75 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.81% (RMSE 4.67 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.973% (RMSE 4.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.944% (RMSE 5.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.843% (RMSE 5.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.566% (RMSE 6.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.709% (RMSE 6.18 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.436% (RMSE 6.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.479% (RMSE 6.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.558% (RMSE 6.1 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.774% (RMSE 5.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.489% (RMSE 5.36 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.888% (RMSE 6.5 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.533% (RMSE 8.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.923% (RMSE 8.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.66% (RMSE 9.15 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.347% (RMSE 9.51 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.933% (RMSE 8.83 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.27% (RMSE 5.77 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.425% (RMSE 5.56 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.27% (RMSE 8.62 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.58% (RMSE 9.03 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.775% (RMSE 9.37 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.102% (RMSE 8.76 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.178% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.943% (RMSE 7.13 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.017% (RMSE 6.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.833% (RMSE 9.82 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.51% (RMSE 9.89 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.762% (RMSE 8.53 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.111% (RMSE 9.86 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.187% (RMSE 7.34 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.482% (RMSE 7.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.489% (RMSE 9.9 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.092% (RMSE 8.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.979% (RMSE 10.66 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.538% (RMSE 8.02 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.519% (RMSE 7.46 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.702% (RMSE 10.15 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.641% (RMSE 10.59 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.967% (RMSE 5.98 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.189% (RMSE 6.05 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.567% (RMSE 9.89 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.239% (RMSE 7.63 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.407% (RMSE 7.54 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.068% (RMSE 10.24 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.415% (RMSE 7.25 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.973% (RMSE 7.85 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.357% (RMSE 7.9 vs. 10.4 null)
Predictive success by metric is explored:
dfSmallR2Temp <- as.data.frame(mtxSmallTemp) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possTempVars[idx1], var2=possTempVars[idx2], rn=row_number())
dfSmallR2Temp %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 630 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 dewpoint_2m vapor_pressure_deficit 0.999
## 2 relativehumidity_2m dewpoint_2m 0.997
## 3 relativehumidity_2m vapor_pressure_deficit 0.989
## 4 dewpoint_2m soil_temperature_0_to_7cm 0.953
## 5 vapor_pressure_deficit soil_temperature_0_to_7cm 0.951
## 6 soil_temperature_0_to_7cm soil_temperature_7_to_28cm 0.941
## 7 et0_fao_evapotranspiration soil_temperature_0_to_7cm 0.941
## 8 surface_pressure soil_temperature_0_to_7cm 0.941
## 9 soil_temperature_0_to_7cm soil_moisture_0_to_7cm 0.940
## 10 relativehumidity_2m soil_temperature_0_to_7cm 0.940
## 11 pressure_msl soil_temperature_0_to_7cm 0.940
## 12 soil_temperature_0_to_7cm soil_temperature_28_to_100cm 0.940
## 13 direct_normal_irradiance soil_temperature_0_to_7cm 0.939
## 14 hour soil_temperature_0_to_7cm 0.939
## 15 winddirection_100m soil_temperature_0_to_7cm 0.939
## 16 direct_radiation soil_temperature_0_to_7cm 0.939
## 17 winddirection_10m soil_temperature_0_to_7cm 0.938
## 18 soil_temperature_0_to_7cm soil_temperature_100_to_255cm 0.938
## 19 soil_temperature_0_to_7cm doy 0.938
## 20 soil_temperature_0_to_7cm soil_moisture_7_to_28cm 0.937
## # ℹ 610 more rows
dfSmallR2Temp %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting temperature",
y="Range of R-squared (min-mean-max)",
x=NULL
)
dfSmallR2Temp %>%
arrange(desc(r2)) %>%
filter(var2!="soil_temperature_0_to_7cm", var1!="soil_temperature_0_to_7cm") %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 595 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 dewpoint_2m vapor_pressure_deficit 0.999
## 2 relativehumidity_2m dewpoint_2m 0.997
## 3 relativehumidity_2m vapor_pressure_deficit 0.989
## 4 et0_fao_evapotranspiration soil_temperature_7_to_28cm 0.897
## 5 vapor_pressure_deficit soil_temperature_7_to_28cm 0.893
## 6 dewpoint_2m et0_fao_evapotranspiration 0.889
## 7 hour soil_temperature_7_to_28cm 0.883
## 8 direct_radiation soil_temperature_7_to_28cm 0.881
## 9 shortwave_radiation soil_temperature_7_to_28cm 0.880
## 10 direct_normal_irradiance soil_temperature_7_to_28cm 0.875
## 11 diffuse_radiation soil_temperature_7_to_28cm 0.866
## 12 dewpoint_2m soil_temperature_7_to_28cm 0.843
## 13 relativehumidity_2m soil_temperature_7_to_28cm 0.835
## 14 winddirection_100m soil_temperature_7_to_28cm 0.820
## 15 windgusts_10m soil_temperature_7_to_28cm 0.820
## 16 winddirection_10m soil_temperature_7_to_28cm 0.818
## 17 surface_pressure soil_temperature_7_to_28cm 0.810
## 18 soil_temperature_7_to_28cm month 0.810
## 19 soil_temperature_7_to_28cm soil_temperature_28_to_100cm 0.809
## 20 pressure_msl soil_temperature_7_to_28cm 0.809
## # ℹ 575 more rows
Select combinations are explored using the full training dataset:
possLargeVars <- c("dewpoint_2m",
"vapor_pressure_deficit",
"relativehumidity_2m",
"soil_temperature_0_to_7cm"
)
possLargeVars
## [1] "dewpoint_2m" "vapor_pressure_deficit"
## [3] "relativehumidity_2m" "soil_temperature_0_to_7cm"
mtxLarge <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeVars)-1)) {
for(idx2 in (idx1+1):length(possLargeVars)) {
r2LargeTemp <- runFullRF(dfTrain=dfTrainTemp[,],
yVar="temperature_2m",
xVars=possLargeVars[c(idx1, idx2)],
dfTest=dfTestTemp,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLarge <- rbind(mtxLarge, c(idx1, idx2, r2LargeTemp))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.964% (RMSE 0.2 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.902% (RMSE 0.33 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.675% (RMSE 2.16 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.177% (RMSE 0.94 vs. 10.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.59% (RMSE 2.18 vs. 10.4 null)
## Growing trees.. Progress: 91%. Estimated remaining time: 2 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.588% (RMSE 2.42 vs. 10.4 null)
dfLargeR2Temp <- as.data.frame(mtxLarge) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeVars[idx1], var2=possLargeVars[idx2], rn=row_number())
dfLargeR2Temp %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 dewpoint_2m vapor_pressure_deficit 1.00
## 2 dewpoint_2m relativehumidity_2m 0.999
## 3 vapor_pressure_deficit relativehumidity_2m 0.992
## 4 dewpoint_2m soil_temperature_0_to_7cm 0.957
## 5 vapor_pressure_deficit soil_temperature_0_to_7cm 0.956
## 6 relativehumidity_2m soil_temperature_0_to_7cm 0.946
A model using only dewpoint and vapor pressure deficit is run on one city, then applied to the other:
# Train and test data
dfTrainTemp_v2 <- allCity %>%
filter(src %in% c("NYC"), tt=="train", year<2022) %>%
mutate(fct_src=factor(src))
dfTestTemp_v2 <- allCity %>%
filter(tt=="test", year==2022) %>%
mutate(fct_src=factor(src))
# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v2 <- runFullRF(dfTrain=dfTrainTemp_v2,
yVar="temperature_2m",
xVars=c("dewpoint_2m", "vapor_pressure_deficit"),
dfTest=dfTestTemp_v2,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["tstPred"]] %>%
select(src, temperature_2m, pred) %>%
group_by(src) %>%
summarize(n=n(),
tss=sum((temperature_2m-mean(temperature_2m))**2),
rss=sum((temperature_2m-pred)**2),
r2=1-rss/tss,
rmse=sqrt(rss/n),
berr=sqrt(tss/n)
)
##
## R-squared of predictions based on NYC pre-2022 training data applied to each city in 2022 holdout dataset is: 94.65% (RMSE 2.47 vs. 10.69 null)
tmpPred_v2
## # A tibble: 5 × 7
## src n tss rss r2 rmse berr
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago 2592 356174. 305. 0.999 0.343 11.7
## 2 Houston 2659 194789. 400. 0.998 0.388 8.56
## 3 LA 2677 127962. 6718. 0.947 1.58 6.91
## 4 NYC 2664 280171. 104. 1.00 0.197 10.3
## 5 Vegas 2537 287670. 72697. 0.747 5.35 10.6
The model trained on NYC performs well on Chicago, Houston, and LA, while missing significantly on Las Vegas
Patterns in dewpoint and vapor pressure deficit are explored:
dfPlot_v2 <- dfTestTemp_v2 %>%
select(src, vapor_pressure_deficit, dewpoint_2m) %>%
mutate(across(where(is.numeric), .fns=function(x) round(2*x)/2)) %>%
count(src, vapor_pressure_deficit, dewpoint_2m)
dfPlot_v2 %>%
ggplot(aes(y=vapor_pressure_deficit, x=dewpoint_2m)) +
geom_point(aes(color=src, size=n), alpha=0.25) + facet_wrap(~src) +
scale_color_discrete(NULL)
# Overlap of NYC points by city
tmpNYC <- dfTrainTemp_v2 %>%
select(src, vapor_pressure_deficit, dewpoint_2m) %>%
mutate(across(where(is.numeric), .fns=function(x) round(2*x)/2)) %>%
count(src, vapor_pressure_deficit, dewpoint_2m) %>%
filter(src=="NYC", n>=10) %>%
mutate(inNYC=TRUE)
dfPlot_v2 %>%
left_join(select(tmpNYC, vapor_pressure_deficit, dewpoint_2m, inNYC),
by=c("vapor_pressure_deficit", "dewpoint_2m")
) %>%
mutate(inNYC=ifelse(is.na(inNYC), FALSE, inNYC)) %>%
ggplot(aes(y=vapor_pressure_deficit, x=dewpoint_2m)) +
geom_point(aes(color=inNYC, size=n), alpha=0.25) + facet_wrap(~src) +
scale_color_discrete("NYC training\nhas 10+ obs")
dfPlot_v2 %>%
left_join(select(tmpNYC, vapor_pressure_deficit, dewpoint_2m, inNYC),
by=c("vapor_pressure_deficit", "dewpoint_2m")
) %>%
mutate(inNYC=ifelse(is.na(inNYC), FALSE, inNYC)) %>%
group_by(src) %>%
summarize(meanNYC=sum(n*inNYC)/sum(n), n=sum(n), nObs=n())
## # A tibble: 5 × 4
## src meanNYC n nObs
## <chr> <dbl> <int> <int>
## 1 Chicago 0.988 2592 335
## 2 Houston 0.936 2659 371
## 3 LA 0.802 2677 490
## 4 NYC 0.990 2664 361
## 5 Vegas 0.355 2537 747
Chicago and NYC are both very well-represented by the training data, while a majority of Las Vegas observations are largely or entirely absent from the training data
There are strong relationships among dewpoint, vapor pressure deficit, relative humidity, and temperature:
dfTestTemp_v2 %>%
select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>%
mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>%
filter(dewpoint_2m %in% c(-10, 0, 10, 20)) %>%
ggplot(aes(x=vapor_pressure_deficit, y=temperature_2m)) +
geom_point(aes(color=factor(dewpoint_2m))) +
scale_color_discrete("Dewpoint")
dfTestTemp_v2 %>%
select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>%
mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>%
filter(dewpoint_2m %in% c(-10, 0, 10, 20)) %>%
ggplot(aes(x=relativehumidity_2m, y=temperature_2m)) +
geom_point(aes(color=factor(dewpoint_2m))) +
scale_color_discrete("Dewpoint")
To better cover the predictor space, a model using only dewpoint and vapor pressure deficit is run on NYC and Vegas, then applied to the others:
# Train and test data
dfTrainTemp_v3 <- allCity %>%
filter(src %in% c("NYC", "Vegas"), tt=="train", year<2022) %>%
mutate(fct_src=factor(src))
dfTestTemp_v3 <- allCity %>%
filter(tt=="test", year==2022) %>%
mutate(fct_src=factor(src))
# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v3 <- runFullRF(dfTrain=dfTrainTemp_v3,
yVar="temperature_2m",
xVars=c("dewpoint_2m", "vapor_pressure_deficit"),
dfTest=dfTestTemp_v3,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["tstPred"]] %>%
select(src, temperature_2m, pred) %>%
group_by(src) %>%
summarize(n=n(),
tss=sum((temperature_2m-mean(temperature_2m))**2),
rss=sum((temperature_2m-pred)**2),
r2=1-rss/tss,
rmse=sqrt(rss/n),
berr=sqrt(tss/n)
)
##
## R-squared of predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset is: 99.959% (RMSE 0.22 vs. 10.69 null)
tmpPred_v3
## # A tibble: 5 × 7
## src n tss rss r2 rmse berr
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago 2592 356174. 213. 0.999 0.287 11.7
## 2 Houston 2659 194789. 258. 0.999 0.312 8.56
## 3 LA 2677 127962. 34.5 1.00 0.113 6.91
## 4 NYC 2664 280171. 66.4 1.00 0.158 10.3
## 5 Vegas 2537 287670. 43.8 1.00 0.131 10.6
The model trained on NYC and Vegas generally performs very well on all cities
Coverage of the temperature and humidity space by city is explored:
dfTestTemp_v2 %>%
select(src, vapor_pressure_deficit, dewpoint_2m, temperature_2m, relativehumidity_2m) %>%
mutate(across(c(dewpoint_2m, temperature_2m, relativehumidity_2m), .fns=function(x) round(x))) %>%
ggplot(aes(x=dewpoint_2m, y=temperature_2m)) +
geom_density2d(data=~filter(., src %in% c("NYC", "Vegas"))) +
geom_point(data=~count(., src, temperature_2m, dewpoint_2m),
aes(color=src, size=n),
alpha=0.25
) +
scale_color_discrete(NULL) +
labs(title="Relationships between temperature and depoint",
subtitle="Contours from geom_density_2d() use only NYC and Las Vegas data"
)
Modeling using NYC and Las Vegas data may not fully cover the coldest and driest portions of the Chicago space
The model using only NYC and Las Vegas is applied to Chicago, with accuracy explored by temperature:
# Train and test data
dfTrainTemp_v3 <- allCity %>%
filter(src %in% c("NYC", "Vegas"), tt=="train", year<2022) %>%
mutate(fct_src=factor(src))
dfTestTemp_v3 <- allCity %>%
filter(tt=="test", year==2022) %>%
mutate(fct_src=factor(src))
# Random forest for temperature using dewpoint and vapor pressure deficit
keyLabel <- "predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset"
tmpPred_v3_df <- runFullRF(dfTrain=dfTrainTemp_v3,
yVar="temperature_2m",
xVars=c("dewpoint_2m", "vapor_pressure_deficit"),
dfTest=dfTestTemp_v3,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["tstPred"]]
##
## R-squared of predictions based on NYC/Vegas pre-2022 training data applied to each city in 2022 holdout dataset is: 99.959% (RMSE 0.22 vs. 10.69 null)
tmpPred_v3_df %>%
select(src, temperature_2m, pred) %>%
group_by(src) %>%
summarize(n=n(),
tss=sum((temperature_2m-mean(temperature_2m))**2),
rss=sum((temperature_2m-pred)**2),
r2=1-rss/tss,
rmse=sqrt(rss/n),
berr=sqrt(tss/n)
)
## # A tibble: 5 × 7
## src n tss rss r2 rmse berr
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chicago 2592 356174. 242. 0.999 0.305 11.7
## 2 Houston 2659 194789. 228. 0.999 0.293 8.56
## 3 LA 2677 127962. 34.6 1.00 0.114 6.91
## 4 NYC 2664 280171. 64.1 1.00 0.155 10.3
## 5 Vegas 2537 287670. 42.3 1.00 0.129 10.6
ggMiniTempCHI <- tmpPred_v3_df %>%
select(src, temperature_2m, pred) %>%
filter(src=="Chicago") %>%
mutate(err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempCHI
## # A tibble: 13 × 7
## rnd5 n temperature_2m pred err err2 pcterr2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -25 2 -24.0 -16.4 7.70 59.4 0.491
## 2 -20 19 -19.0 -17.6 1.34 2.54 0.200
## 3 -15 43 -14.3 -14.0 0.333 0.164 0.0291
## 4 -10 129 -9.94 -9.72 0.223 0.0970 0.0518
## 5 -5 247 -4.80 -4.61 0.188 0.0720 0.0736
## 6 0 321 0.192 0.269 0.0766 0.0239 0.0317
## 7 5 356 4.69 4.69 0.00503 0.0119 0.0175
## 8 10 284 9.82 9.79 -0.0294 0.0253 0.0298
## 9 15 308 14.9 14.9 -0.00369 0.00893 0.0114
## 10 20 480 20.2 20.1 -0.0391 0.0106 0.0210
## 11 25 303 24.6 24.6 -0.0730 0.0236 0.0295
## 12 30 90 29.1 29.0 -0.0939 0.0260 0.00969
## 13 35 10 34.7 34.4 -0.270 0.105 0.00435
ggMiniTempCHI %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 0.0933 0.305
ggMiniTempCHI %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on Chicago Data",
x="Chicago actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
As expected, predictions are excellent in the space covered by the training data and poor for the small number of very cold observations never seen in training. Around 60% of MSE in Chicago temperature predictions occurs in the 23 test data observations where temperature (rounded to nearest 5 degrees C) is -20C or colder
The model using only NYC and Las Vegas is applied to Houston, with accuracy explored by temperature:
ggMiniTempHOU <- tmpPred_v3_df %>%
select(src, temperature_2m, pred) %>%
filter(src=="Houston") %>%
mutate(err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempHOU
## # A tibble: 11 × 7
## rnd5 n temperature_2m pred err err2 pcterr2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -10 2 -8.15 -7.88 0.270 0.103 0.000904
## 2 -5 12 -3.8 -3.57 0.233 0.0711 0.00374
## 3 0 44 0.448 0.497 0.0490 0.0190 0.00366
## 4 5 179 5.44 5.43 -0.00674 0.00870 0.00682
## 5 10 304 10.0 10.0 -0.0150 0.0183 0.0243
## 6 15 279 15.1 15.1 -0.0164 0.00935 0.0114
## 7 20 495 20.3 20.2 -0.0439 0.0108 0.0234
## 8 25 755 25.0 24.8 -0.279 0.164 0.541
## 9 30 442 29.6 29.4 -0.191 0.0781 0.151
## 10 35 144 34.4 34.0 -0.415 0.310 0.195
## 11 40 3 38.0 36.3 -1.71 2.92 0.0383
ggMiniTempHOU %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 0.0859 0.293
ggMiniTempHOU %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on Houston Data",
x="Houston actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
As expected, predictions are excellent in the space covered by the training data and miss only with the very hottest observations never seen in training
The model using only NYC and Las Vegas is applied to Los Angeles, with accuracy explored by temperature:
ggMiniTempLA <- tmpPred_v3_df %>%
select(src, temperature_2m, pred) %>%
filter(src=="LA") %>%
mutate(err=pred-temperature_2m,
err2=err**2,
rnd5=round(temperature_2m/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean)) %>%
mutate(pcterr2=n*err2/sum(n*err2))
ggMiniTempLA
## # A tibble: 9 × 7
## rnd5 n temperature_2m pred err err2 pcterr2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 11 0.991 1.05 0.0590 0.0195 0.00620
## 2 5 127 5.89 5.87 -0.0172 0.0127 0.0467
## 3 10 617 10.2 10.3 0.0386 0.0206 0.368
## 4 15 783 15.0 15.0 -0.0121 0.0107 0.241
## 5 20 578 19.7 19.7 -0.0220 0.00852 0.142
## 6 25 309 24.9 24.9 -0.00717 0.00572 0.0511
## 7 30 199 29.7 29.7 -0.0382 0.00684 0.0394
## 8 35 48 34.6 34.5 -0.0932 0.0224 0.0311
## 9 40 5 39.6 39.2 -0.457 0.512 0.0740
ggMiniTempLA %>%
summarize(mse=sum(n*err2)/sum(n)) %>%
mutate(rmse=sqrt(mse))
## # A tibble: 1 × 2
## mse rmse
## <dbl> <dbl>
## 1 0.0129 0.114
ggMiniTempLA %>%
select(rnd5, temperature_2m, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "temperature_2m"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Temperature Using NYC/Vegas Random Forest Model on LA Data",
x="Los Angeles actual temperature (rounded to nearest 5)",
y="Average temperature for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
As expected, predictions are excellent since the entire LA space is covered by the training data
An approximate formula for relative humidity is assessed for resonance with the data:
# Approximate formula for relative humidity
# Source https://www.omnicalculator.com/physics/relative-humidity
calcRH <- function(t, d, c1=17.63, c2=243) {
100 * exp((c1*d)/(c2+d)) / exp((c1*t)/(c2+t))
}
# Applied to sample data
dfTestTemp_v3 %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m) %>%
mutate(crh=calcRH(t, d)) %>%
ggplot(aes(x=rh, y=crh)) +
geom_point(aes(color=src)) +
facet_wrap(~src) +
geom_smooth(method="lm") +
geom_abline(intercept=0, slope=1, lty=2) +
labs(x="Reported relative humidity",
y="Formula relative humidity",
title="Relative humidity by formula from temperature and dewpoint vs. reported in raw data") +
scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'
The formula is an exact match to the reported data, allowing the random forest to find the correct third value when given two of T, D, RH, provided that the training space also includes that combination
Example training data is created for all temperatures and dew points between -30 and 50 (rounded to the nearest 1), with RH calculated based on formula:
# Sample dataset
rhTrain <- expand.grid(t=seq(-30, 50, by=1), d=seq(-30, 50, by=1)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(rh=calcRH(t, d))
rhTrain
## # A tibble: 3,321 × 3
## t d rh
## <dbl> <dbl> <dbl>
## 1 -30 -30 100
## 2 -29 -30 91.0
## 3 -28 -30 82.9
## 4 -27 -30 75.6
## 5 -26 -30 69.0
## 6 -25 -30 63.0
## 7 -24 -30 57.6
## 8 -23 -30 52.7
## 9 -22 -30 48.3
## 10 -21 -30 44.2
## # ℹ 3,311 more rows
# Training and testing (mtry=1)
rhOut <- rhTrain %>%
bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("rh", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 98.741% (RMSE 1.17 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOut <- rhOut$tstPred
rhOut
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 1.58
## 2 NYC -0.8 -1.2 97 1.21
## 3 NYC -0.7 -1.1 97 1.21
## 4 NYC -0.6 -1 97 1.21
## 5 NYC 4.8 0.4 73 4.54
## 6 NYC 1.7 -0.4 86 3.84
## 7 NYC -1.8 -6.2 72 -1.91
## 8 NYC -2 -9.9 55 -1.50
## 9 NYC -3.7 -13.1 48 -3.47
## 10 NYC -8.7 -17.4 49 -7.59
## # ℹ 182,625 more rows
# Errors by city
rhOut %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 1.60 -0.462 36557 125. 1.26 0.987
## 2 Houston 1.95 -0.369 36998 60.4 1.40 0.968
## 3 LA 1.22 -0.322 36972 51.9 1.11 0.976
## 4 NYC 1.63 -0.432 35474 102. 1.28 0.984
## 5 Vegas 0.398 0.0241 36634 110. 0.631 0.996
# Errors by RH
rhOut %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 6.57 -2.40 3 74.4 2.56 0.912 0.0000794
## 2 5 1.06 -0.0228 2151 46.2 1.03 0.977 0.00915
## 3 10 0.462 0.122 6179 61.0 0.680 0.992 0.0115
## 4 15 0.356 0.236 6865 64.8 0.596 0.995 0.00984
## 5 20 0.243 0.113 6235 67.6 0.493 0.996 0.00610
## 6 25 0.190 -0.0796 5763 76.5 0.436 0.998 0.00441
## 7 30 0.193 0.0212 5964 92.0 0.439 0.998 0.00462
## 8 35 0.205 0.116 6306 105. 0.453 0.998 0.00521
## 9 40 0.207 0.146 7230 112. 0.455 0.998 0.00603
## 10 45 0.192 0.0265 8165 116. 0.438 0.998 0.00632
## 11 50 0.233 -0.149 9269 113. 0.483 0.998 0.00870
## 12 55 0.248 -0.0917 9997 114. 0.498 0.998 0.00996
## 13 60 0.263 -0.0551 10919 112. 0.512 0.998 0.0115
## 14 65 0.334 -0.109 11278 115. 0.578 0.997 0.0152
## 15 70 0.454 -0.305 12057 111. 0.674 0.996 0.0220
## 16 75 0.464 -0.303 12812 108. 0.681 0.996 0.0239
## 17 80 0.746 -0.536 13248 102. 0.864 0.993 0.0398
## 18 85 3.09 -1.13 13982 85.8 1.76 0.964 0.174
## 19 90 6.40 -0.484 15304 69.7 2.53 0.908 0.395
## 20 95 3.80 -1.10 14419 52.8 1.95 0.928 0.221
## 21 100 0.897 -0.838 4489 35.9 0.947 0.975 0.0162
Training data rounds temperature to the nearest degree and RH always rounds to the nearest percent, making temperature predictions commonly off by a fraction of a degree. The model is generally accurate, with the exception of very low relative humidities (rounding is much more impactful) and very high relative humidities (mtry=1 creates challenges since grid-based training data overweights some T/D combinations).
The example training data is modified to be more consistent with T/D typically observed:
# Sample of T/D in cities
set.seed(24072114)
tdAll <- allCity %>%
select(t=temperature_2m, d=dewpoint_2m) %>%
slice(sample(1:nrow(.), round(nrow(.)/10), replace=TRUE)) %>%
mutate(across(where(is.numeric), .fns=round)) %>%
count(t, d)
tdAll
## # A tibble: 1,814 × 3
## t d n
## <dbl> <dbl> <int>
## 1 -30 -34 1
## 2 -29 -34 1
## 3 -26 -32 1
## 4 -26 -30 1
## 5 -25 -29 1
## 6 -24 -28 2
## 7 -24 -27 1
## 8 -22 -28 1
## 9 -22 -26 1
## 10 -22 -25 5
## # ℹ 1,804 more rows
# Examples of real-world occurence
tdAll %>%
ggplot(aes(x=d, y=t)) +
geom_point(aes(size=n), alpha=0.25) +
labs(title="Sample (10%) of 5-city temperature and dew points")
# Training and testing (mtry=1) weighted by real-world occurence
rhOut_wtd <- rhTrain %>%
left_join(tdAll, by=c("t", "d")) %>%
mutate(n=ifelse(is.na(n), 5, n+5)) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("rh", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
case.weights="n",
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 98.678% (RMSE 1.2 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOut_wtd <- rhOut_wtd$tstPred
rhOut_wtd
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 1.87
## 2 NYC -0.8 -1.2 97 1.88
## 3 NYC -0.7 -1.1 97 1.88
## 4 NYC -0.6 -1 97 1.88
## 5 NYC 4.8 0.4 73 4.93
## 6 NYC 1.7 -0.4 86 3.19
## 7 NYC -1.8 -6.2 72 -1.47
## 8 NYC -2 -9.9 55 -1.69
## 9 NYC -3.7 -13.1 48 -3.25
## 10 NYC -8.7 -17.4 49 -7.82
## # ℹ 182,625 more rows
# Errors by city
rhOut_wtd %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 1.50 -0.553 36557 125. 1.22 0.988
## 2 Houston 2.23 -0.541 36998 60.4 1.49 0.963
## 3 LA 1.40 -0.467 36972 51.9 1.18 0.973
## 4 NYC 1.69 -0.562 35474 102. 1.30 0.983
## 5 Vegas 0.331 -0.0175 36634 110. 0.575 0.997
# Errors by RH
rhOut_wtd %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 5.93 -2.24 3 74.4 2.44 0.920 0.0000682
## 2 5 1.04 -0.0480 2151 46.2 1.02 0.978 0.00854
## 3 10 0.454 0.106 6179 61.0 0.674 0.993 0.0107
## 4 15 0.301 0.195 6865 64.8 0.548 0.995 0.00791
## 5 20 0.194 0.0202 6235 67.6 0.440 0.997 0.00463
## 6 25 0.168 -0.0755 5763 76.5 0.410 0.998 0.00371
## 7 30 0.161 -0.0931 5964 92.0 0.401 0.998 0.00368
## 8 35 0.140 -0.00134 6306 105. 0.375 0.999 0.00339
## 9 40 0.143 0.0280 7230 112. 0.378 0.999 0.00396
## 10 45 0.140 -0.0439 8165 116. 0.374 0.999 0.00438
## 11 50 0.158 -0.141 9269 113. 0.397 0.999 0.00561
## 12 55 0.167 -0.109 9997 114. 0.409 0.999 0.00641
## 13 60 0.190 -0.152 10919 112. 0.436 0.998 0.00796
## 14 65 0.246 -0.161 11278 115. 0.496 0.998 0.0107
## 15 70 0.318 -0.315 12057 111. 0.564 0.997 0.0147
## 16 75 0.339 -0.374 12812 108. 0.582 0.997 0.0166
## 17 80 0.555 -0.496 13248 102. 0.745 0.995 0.0282
## 18 85 1.34 -0.739 13982 85.8 1.16 0.984 0.0721
## 19 90 5.06 -0.731 15304 69.7 2.25 0.927 0.297
## 20 95 7.28 -1.84 14419 52.8 2.70 0.862 0.402
## 21 100 5.09 -2.18 4489 35.9 2.26 0.858 0.0876
The weighted training data performs slightly better for data points with high density, at the offset of somewhat worse performance for less commonly observed relative humidities
Example training data is expanded all temperatures and dew points between -50 and 50 (rounded to the nearest 1), with RH calculated based on formula:
# Sample dataset
rhTrain_ex <- expand.grid(t=seq(-50, 50, by=1), d=seq(-50, 50, by=1)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(rh=calcRH(t, d))
rhTrain_ex
## # A tibble: 5,151 × 3
## t d rh
## <dbl> <dbl> <dbl>
## 1 -50 -50 100
## 2 -49 -50 89.2
## 3 -48 -50 79.6
## 4 -47 -50 71.2
## 5 -46 -50 63.7
## 6 -45 -50 57.1
## 7 -44 -50 51.2
## 8 -43 -50 46.0
## 9 -42 -50 41.3
## 10 -41 -50 37.2
## # ℹ 5,141 more rows
# Sample of T/D in cities from previous code section (frame 'tdAll')
# Training and testing (mtry=1) weighted by real-world occurence
rhOut_wtd_ex <- rhTrain_ex %>%
left_join(tdAll, by=c("t", "d")) %>%
mutate(n=ifelse(is.na(n), 5, n+5)) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("rh", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
case.weights="n",
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 98.789% (RMSE 1.14 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOut_wtd_ex <- rhOut_wtd_ex$tstPred
rhOut_wtd_ex
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 3.16
## 2 NYC -0.8 -1.2 97 3.18
## 3 NYC -0.7 -1.1 97 3.18
## 4 NYC -0.6 -1 97 3.18
## 5 NYC 4.8 0.4 73 4.78
## 6 NYC 1.7 -0.4 86 2.60
## 7 NYC -1.8 -6.2 72 -1.38
## 8 NYC -2 -9.9 55 -1.57
## 9 NYC -3.7 -13.1 48 -3.27
## 10 NYC -8.7 -17.4 49 -7.45
## # ℹ 182,625 more rows
# Errors by city
rhOut_wtd_ex %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 1.32 -0.597 36557 125. 1.15 0.989
## 2 Houston 2.28 -0.665 36998 60.4 1.51 0.962
## 3 LA 1.10 -0.399 36972 51.9 1.05 0.979
## 4 NYC 1.53 -0.606 35474 102. 1.24 0.985
## 5 Vegas 0.306 -0.0457 36634 110. 0.553 0.997
# Errors by RH
rhOut_wtd_ex %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 5.76 -2.24 3 74.4 2.40 0.923 0.0000724
## 2 5 1.01 0.0847 2151 46.2 1.01 0.978 0.00911
## 3 10 0.417 0.0328 6179 61.0 0.646 0.993 0.0108
## 4 15 0.245 -0.0138 6865 64.8 0.495 0.996 0.00704
## 5 20 0.184 0.0197 6235 67.6 0.429 0.997 0.00480
## 6 25 0.171 0.117 5763 76.5 0.413 0.998 0.00412
## 7 30 0.157 0.0106 5964 92.0 0.396 0.998 0.00392
## 8 35 0.139 -0.0553 6306 105. 0.373 0.999 0.00366
## 9 40 0.143 -0.0675 7230 112. 0.378 0.999 0.00433
## 10 45 0.132 -0.00440 8165 116. 0.363 0.999 0.00450
## 11 50 0.157 -0.137 9269 113. 0.396 0.999 0.00609
## 12 55 0.161 -0.109 9997 114. 0.401 0.999 0.00674
## 13 60 0.191 -0.166 10919 112. 0.436 0.998 0.00871
## 14 65 0.261 -0.300 11278 115. 0.511 0.998 0.0123
## 15 70 0.296 -0.302 12057 111. 0.544 0.997 0.0150
## 16 75 0.359 -0.349 12812 108. 0.599 0.997 0.0193
## 17 80 0.451 -0.462 13248 102. 0.672 0.996 0.0250
## 18 85 1.02 -0.690 13982 85.8 1.01 0.988 0.0597
## 19 90 3.03 -0.935 15304 69.7 1.74 0.956 0.194
## 20 95 9.09 -2.20 14419 52.8 3.02 0.828 0.549
## 21 100 2.75 -1.60 4489 35.9 1.66 0.923 0.0517
# Errors by RH (plotted)
rhOut_wtd_ex %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
select(rh5, rmse, r2) %>%
pivot_longer(cols=-c(rh5)) %>%
ggplot(aes(x=rh5, y=value)) +
geom_point(aes(color=name)) +
facet_wrap(~name, ncol=1, scales="free_y") +
labs(title="R-squared and RMSE of temperature predictions by relative humidity",
x="Reported relative humidity (rounded to nearest 5)",
y=NULL
) +
scale_color_discrete(NULL)
The expanded training data improves prediction quality at very low temperatures. Predictions continue to be less accurate at very low, and very high, relative humidities
Rounding is a meaningful challenge for some temperature predictions given training data that rounds temperature and dewpoint to the nearest 1:
# Prediction error summaries (most of the significant errors occur when RH is 90+)
rhOut_wtd_ex %>% mutate(delta=abs(pred-t)) %>% summary()
## src t d rh
## Length:182635 Min. :-31.10 Min. :-35.400 Min. : 2.00
## Class :character 1st Qu.: 9.10 1st Qu.: -1.300 1st Qu.: 42.00
## Mode :character Median : 16.90 Median : 7.200 Median : 65.00
## Mean : 16.22 Mean : 6.615 Mean : 61.05
## 3rd Qu.: 23.90 3rd Qu.: 14.900 3rd Qu.: 83.00
## Max. : 45.80 Max. : 27.200 Max. :100.00
## pred delta
## Min. :-30.404 Min. :0.000001
## 1st Qu.: 9.645 1st Qu.:0.203006
## Median : 17.260 Median :0.441588
## Mean : 16.681 Mean :0.737171
## 3rd Qu.: 24.339 3rd Qu.:0.866279
## Max. : 45.730 Max. :5.065160
rhOut_wtd_ex %>% mutate(delta=abs(pred-t)) %>% filter(delta>1.5) %>% summary()
## src t d rh
## Length:23492 Min. :-22.20 Min. :-27.50 Min. : 2.0
## Class :character 1st Qu.: 9.40 1st Qu.: 7.80 1st Qu.: 90.0
## Mode :character Median : 16.40 Median : 14.80 Median : 94.0
## Mean : 15.23 Mean : 13.41 Mean : 91.5
## 3rd Qu.: 21.70 3rd Qu.: 20.40 3rd Qu.: 96.0
## Max. : 45.10 Max. : 26.80 Max. :100.0
## pred delta
## Min. :-20.59 Min. :1.500
## 1st Qu.: 11.77 1st Qu.:1.767
## Median : 18.45 Median :2.263
## Mean : 17.43 Mean :2.642
## 3rd Qu.: 23.59 3rd Qu.:3.584
## Max. : 45.73 Max. :5.065
# Sample dataset
rhTrain_hl <- expand.grid(t=seq(-25, 50, by=0.1), d=seq(-20, 20, by=10)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(rh=calcRH(t, d), rndt=round(t), rndrh=round(rh))
rhTrain_hl
## # A tibble: 2,505 × 5
## t d rh rndt rndrh
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -20 -20 100 -20 100
## 2 -19.9 -20 99.1 -20 99
## 3 -19.8 -20 98.3 -20 98
## 4 -19.7 -20 97.5 -20 97
## 5 -19.6 -20 96.6 -20 97
## 6 -19.5 -20 95.8 -20 96
## 7 -19.4 -20 95.0 -19 95
## 8 -19.3 -20 94.2 -19 94
## 9 -19.2 -20 93.4 -19 93
## 10 -19.1 -20 92.6 -19 93
## # ℹ 2,495 more rows
# Examples for rndt==d
rhTrain_hl %>%
filter(rndt==d) %>%
print(n=40)
## # A tibble: 30 × 5
## t d rh rndt rndrh
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -20 -20 100 -20 100
## 2 -19.9 -20 99.1 -20 99
## 3 -19.8 -20 98.3 -20 98
## 4 -19.7 -20 97.5 -20 97
## 5 -19.6 -20 96.6 -20 97
## 6 -19.5 -20 95.8 -20 96
## 7 -10 -10 100 -10 100
## 8 -9.9 -10 99.2 -10 99
## 9 -9.8 -10 98.4 -10 98
## 10 -9.7 -10 97.7 -10 98
## 11 -9.6 -10 96.9 -10 97
## 12 -9.5 -10 96.1 -10 96
## 13 0 0 100 0 100
## 14 0.100 0 99.3 0 99
## 15 0.200 0 98.6 0 99
## 16 0.300 0 97.8 0 98
## 17 0.400 0 97.1 0 97
## 18 0.5 0 96.4 0 96
## 19 10 10 100 10 100
## 20 10.1 10 99.3 10 99
## 21 10.2 10 98.7 10 99
## 22 10.3 10 98.0 10 98
## 23 10.4 10 97.4 10 97
## 24 10.5 10 96.7 10 97
## 25 20 20 100 20 100
## 26 20.1 20 99.4 20 99
## 27 20.2 20 98.8 20 99
## 28 20.3 20 98.2 20 98
## 29 20.4 20 97.6 20 98
## 30 20.5 20 97.0 20 97
# Examples for rndrh==1
rhTrain_hl %>%
filter(rndrh<=5) %>%
group_by(d, rndrh) %>%
summarize(maxt=max(t), meant=mean(t), mint=min(t), n=n(), .groups="drop")
## # A tibble: 10 × 6
## d rndrh maxt meant mint n
## <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 -20 1 50 46.2 42.4 77
## 2 -20 2 42.3 37.7 33 94
## 3 -20 3 32.9 30.1 27.2 58
## 4 -20 4 27.1 25.1 23 42
## 5 -20 5 22.9 21.3 19.7 33
## 6 -10 2 50 49.2 48.5 16
## 7 -10 3 48.4 45.2 42 65
## 8 -10 4 41.9 39.6 37.3 47
## 9 -10 5 37.2 35.4 33.7 36
## 10 0 5 50 49.0 47.9 22
As temperature and dewpoint converge (high relative humidity), the same rounded value of temperature can occur with RH that spans as much as ~4%. Greater granularity in the training data may help address this. As relative humidity gets very low, a given dewpoint can be associated with over 5 degrees of temperature variation for the same rounded value of RH. Since the raw data has rounded RH, this may be a harder constraint, though extremely low relative humidity is uncommon so this may not be a major driver of overall RMSE
Training data is updated to include 0.2 degree granularity for temperature and dewpoint:
# Sample dataset
rhTrain_02 <- expand.grid(t=seq(-50, 50, by=0.2), d=seq(-50, 50, by=0.2)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(rh=calcRH(t, d))
rhTrain_02
## # A tibble: 125,751 × 3
## t d rh
## <dbl> <dbl> <dbl>
## 1 -50 -50 100
## 2 -49.8 -50 97.7
## 3 -49.6 -50 95.5
## 4 -49.4 -50 93.4
## 5 -49.2 -50 91.2
## 6 -49 -50 89.2
## 7 -48.8 -50 87.2
## 8 -48.6 -50 85.2
## 9 -48.4 -50 83.3
## 10 -48.2 -50 81.5
## # ℹ 125,741 more rows
# Training and testing (mtry=1) - NOT weighted by real-world occurence
rhOut_02 <- rhTrain_02 %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("rh", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 99.843% (RMSE 0.41 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOut_02 <- rhOut_02$tstPred
rhOut_02
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 -0.345
## 2 NYC -0.8 -1.2 97 -0.405
## 3 NYC -0.7 -1.1 97 -0.398
## 4 NYC -0.6 -1 97 -0.311
## 5 NYC 4.8 0.4 73 4.89
## 6 NYC 1.7 -0.4 86 1.73
## 7 NYC -1.8 -6.2 72 -1.60
## 8 NYC -2 -9.9 55 -2.07
## 9 NYC -3.7 -13.1 48 -3.79
## 10 NYC -8.7 -17.4 49 -8.58
## # ℹ 182,625 more rows
# Errors by city
rhOut_02 %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.120 -0.113 36557 125. 0.346 0.999
## 2 Houston 0.230 -0.155 36998 60.4 0.479 0.996
## 3 LA 0.205 -0.0963 36972 51.9 0.453 0.996
## 4 NYC 0.160 -0.123 35474 102. 0.400 0.998
## 5 Vegas 0.131 0.0466 36634 110. 0.362 0.999
# Errors by RH
rhOut_02 %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 9.13 -2.74 3 74.4 3.02 0.877 0.000885
## 2 5 0.887 -0.00572 2151 46.2 0.942 0.981 0.0616
## 3 10 0.273 0.0711 6179 61.0 0.523 0.996 0.0546
## 4 15 0.121 0.0351 6865 64.8 0.348 0.998 0.0268
## 5 20 0.0737 0.0802 6235 67.6 0.272 0.999 0.0149
## 6 25 0.0547 0.105 5763 76.5 0.234 0.999 0.0102
## 7 30 0.0348 0.0531 5964 92.0 0.187 1.00 0.00670
## 8 35 0.0272 0.0174 6306 105. 0.165 1.00 0.00554
## 9 40 0.0231 0.0342 7230 112. 0.152 1.00 0.00541
## 10 45 0.0253 0.0829 8165 116. 0.159 1.00 0.00667
## 11 50 0.0187 0.0565 9269 113. 0.137 1.00 0.00561
## 12 55 0.0173 0.0325 9997 114. 0.131 1.00 0.00557
## 13 60 0.0139 -0.0159 10919 112. 0.118 1.00 0.00492
## 14 65 0.0140 0.00561 11278 115. 0.118 1.00 0.00511
## 15 70 0.0153 0.0111 12057 111. 0.124 1.00 0.00595
## 16 75 0.0214 -0.0571 12812 108. 0.146 1.00 0.00887
## 17 80 0.0259 -0.0857 13248 102. 0.161 1.00 0.0111
## 18 85 0.0329 -0.0999 13982 85.8 0.181 1.00 0.0149
## 19 90 0.0641 -0.196 15304 69.7 0.253 0.999 0.0317
## 20 95 0.272 -0.374 14419 52.8 0.522 0.995 0.127
## 21 100 4.04 -1.87 4489 35.9 2.01 0.888 0.586
The model performs very well, with the exception of some remaining RMSE mainly for very high RH. Allowing both predictors (RH, D) to be used at the same time is forced using mtry=2:
rhOut_02_mt2 <- rhTrain_02 %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("rh", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=2,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
## Growing trees.. Progress: 96%. Estimated remaining time: 1 seconds.
##
## R-squared of test data is: 99.962% (RMSE 0.2 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOut_02_mt2 <- rhOut_02_mt2$tstPred
rhOut_02_mt2
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 -0.996
## 2 NYC -0.8 -1.2 97 -0.820
## 3 NYC -0.7 -1.1 97 -0.815
## 4 NYC -0.6 -1 97 -0.625
## 5 NYC 4.8 0.4 73 4.80
## 6 NYC 1.7 -0.4 86 1.60
## 7 NYC -1.8 -6.2 72 -1.80
## 8 NYC -2 -9.9 55 -2.20
## 9 NYC -3.7 -13.1 48 -3.79
## 10 NYC -8.7 -17.4 49 -8.58
## # ℹ 182,625 more rows
# Errors by city
rhOut_02_mt2 %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.0133 0.0479 36557 125. 0.116 1.00
## 2 Houston 0.0139 0.0489 36998 60.4 0.118 1.00
## 3 LA 0.0353 0.0530 36972 51.9 0.188 0.999
## 4 NYC 0.0134 0.0510 35474 102. 0.116 1.00
## 5 Vegas 0.126 0.0675 36634 110. 0.355 0.999
# Errors by RH
rhOut_02_mt2 %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 9.37 -2.76 3 74.4 3.06 0.874 0.00379
## 2 5 0.891 -0.0136 2151 46.2 0.944 0.981 0.259
## 3 10 0.268 0.0792 6179 61.0 0.518 0.996 0.224
## 4 15 0.121 0.0835 6865 64.8 0.348 0.998 0.112
## 5 20 0.0688 0.0788 6235 67.6 0.262 0.999 0.0579
## 6 25 0.0456 0.0697 5763 76.5 0.214 0.999 0.0355
## 7 30 0.0334 0.0639 5964 92.0 0.183 1.00 0.0268
## 8 35 0.0256 0.0624 6306 105. 0.160 1.00 0.0218
## 9 40 0.0216 0.0598 7230 112. 0.147 1.00 0.0210
## 10 45 0.0187 0.0585 8165 116. 0.137 1.00 0.0205
## 11 50 0.0166 0.0552 9269 113. 0.129 1.00 0.0207
## 12 55 0.0150 0.0559 9997 114. 0.123 1.00 0.0203
## 13 60 0.0139 0.0526 10919 112. 0.118 1.00 0.0205
## 14 65 0.0128 0.0511 11278 115. 0.113 1.00 0.0195
## 15 70 0.0126 0.0527 12057 111. 0.112 1.00 0.0206
## 16 75 0.0123 0.0488 12812 108. 0.111 1.00 0.0212
## 17 80 0.0110 0.0431 13248 102. 0.105 1.00 0.0197
## 18 85 0.0122 0.0495 13982 85.8 0.110 1.00 0.0229
## 19 90 0.0110 0.0408 15304 69.7 0.105 1.00 0.0227
## 20 95 0.0130 0.0611 14419 52.8 0.114 1.00 0.0252
## 21 100 0.00787 -0.0108 4489 35.9 0.0887 1.00 0.00476
The model performs significantly better for very high RH, with the only meaningful errors at low RH where the impact of rounding (raw data RH is reported to the nearest percent) has the greatest impact
The process is run to predict RH based on temperature and dewpoint, starting with mtry=1:
predRH_02_mt1 <- rhTrain_02 %>%
runFullRF(dfTrain=.,
yVar=c("rh"),
xVars=c("t", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 99.917% (RMSE 0.75 vs. 26.1 null)
## `geom_smooth()` using formula = 'y ~ x'
predRH_02_mt1 <- predRH_02_mt1$tstPred
predRH_02_mt1
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 93.8
## 2 NYC -0.8 -1.2 97 95.2
## 3 NYC -0.7 -1.1 97 95.2
## 4 NYC -0.6 -1 97 94.7
## 5 NYC 4.8 0.4 73 72.9
## 6 NYC 1.7 -0.4 86 85.9
## 7 NYC -1.8 -6.2 72 71.8
## 8 NYC -2 -9.9 55 54.4
## 9 NYC -3.7 -13.1 48 48.1
## 10 NYC -8.7 -17.4 49 49.6
## # ℹ 182,625 more rows
# Errors by city
predRH_02_mt1 %>%
group_by(src) %>%
summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.657 0.404 36557 240. 0.811 0.997
## 2 Houston 0.702 0.470 36998 341. 0.838 0.998
## 3 LA 0.652 0.323 36972 665. 0.807 0.999
## 4 NYC 0.672 0.409 35474 321. 0.820 0.998
## 5 Vegas 0.161 0.0693 36634 376. 0.402 1.00
# Errors by RH
predRH_02_mt1 %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.208 -0.429 3 0 0.456 -Inf 0.00000600
## 2 5 0.0972 -0.0812 2151 1.27 0.312 0.924 0.00201
## 3 10 0.0957 -0.00871 6179 1.94 0.309 0.951 0.00569
## 4 15 0.107 0.0250 6865 2.00 0.327 0.947 0.00705
## 5 20 0.109 0.0178 6235 2.01 0.330 0.946 0.00653
## 6 25 0.115 0.0279 5763 1.99 0.340 0.942 0.00641
## 7 30 0.135 0.0493 5964 2.05 0.367 0.934 0.00774
## 8 35 0.150 0.0690 6306 2.00 0.387 0.925 0.00910
## 9 40 0.158 0.0293 7230 1.98 0.398 0.920 0.0110
## 10 45 0.160 0.0107 8165 1.99 0.400 0.919 0.0126
## 11 50 0.179 0.0272 9269 2.00 0.423 0.910 0.0160
## 12 55 0.202 0.0662 9997 2.01 0.450 0.899 0.0195
## 13 60 0.235 0.136 10919 2.01 0.485 0.883 0.0247
## 14 65 0.279 0.180 11278 2.01 0.528 0.861 0.0303
## 15 70 0.294 0.170 12057 1.99 0.542 0.852 0.0341
## 16 75 0.317 0.222 12812 2.03 0.563 0.844 0.0391
## 17 80 0.406 0.330 13248 1.97 0.637 0.794 0.0518
## 18 85 0.538 0.470 13982 2.00 0.733 0.732 0.0724
## 19 90 0.834 0.727 15304 1.97 0.913 0.578 0.123
## 20 95 1.84 1.23 14419 1.93 1.36 0.0451 0.256
## 21 100 6.14 2.40 4489 0.554 2.48 -10.1 0.265
The model is inaccurate at high relative humidities but otherwise predicts accurately RH consistent with the known formula
The process is updated to predict RH based on temperature and dewpoint with mtry=2:
predRH_02_mt2 <- rhTrain_02 %>%
runFullRF(dfTrain=.,
yVar=c("rh"),
xVars=c("t", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=2,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, rh=relativehumidity_2m),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 99.97% (RMSE 0.45 vs. 26.1 null)
## `geom_smooth()` using formula = 'y ~ x'
predRH_02_mt2 <- predRH_02_mt2$tstPred
predRH_02_mt2
## # A tibble: 182,635 × 5
## src t d rh pred
## <chr> <dbl> <dbl> <int> <dbl>
## 1 NYC -1 -1.6 96 95.7
## 2 NYC -0.8 -1.2 97 97.0
## 3 NYC -0.7 -1.1 97 97.0
## 4 NYC -0.6 -1 97 97.1
## 5 NYC 4.8 0.4 73 73.3
## 6 NYC 1.7 -0.4 86 86.5
## 7 NYC -1.8 -6.2 72 71.9
## 8 NYC -2 -9.9 55 54.4
## 9 NYC -3.7 -13.1 48 48.1
## 10 NYC -8.7 -17.4 49 49.8
## # ℹ 182,625 more rows
# Errors by city
predRH_02_mt2 %>%
group_by(src) %>%
summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.255 0.0224 36557 240. 0.505 0.999
## 2 Houston 0.221 0.0247 36998 341. 0.470 0.999
## 3 LA 0.197 0.0364 36972 665. 0.444 1.00
## 4 NYC 0.229 0.0228 35474 321. 0.478 0.999
## 5 Vegas 0.117 0.0269 36634 376. 0.342 1.00
# Errors by RH
predRH_02_mt2 %>%
mutate(rh5=round(rh/5)*5) %>%
group_by(rh5) %>%
summarize(e2=mean((rh-pred)**2), mu=mean(rh-pred), n=n(), e2Base=mean((rh-mean(rh))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=25)
## # A tibble: 21 × 8
## rh5 e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.145 -0.344 3 0 0.381 -Inf 0.0000117
## 2 5 0.0834 -0.0197 2151 1.27 0.289 0.935 0.00482
## 3 10 0.0863 0.0137 6179 1.94 0.294 0.956 0.0143
## 4 15 0.0904 0.0286 6865 2.00 0.301 0.955 0.0167
## 5 20 0.0961 0.0328 6235 2.01 0.310 0.952 0.0161
## 6 25 0.100 0.0269 5763 1.99 0.317 0.950 0.0155
## 7 30 0.110 0.0271 5964 2.05 0.331 0.947 0.0176
## 8 35 0.115 0.0245 6306 2.00 0.340 0.942 0.0196
## 9 40 0.128 0.0227 7230 1.98 0.358 0.935 0.0250
## 10 45 0.138 0.0213 8165 1.99 0.372 0.930 0.0304
## 11 50 0.150 0.0185 9269 2.00 0.388 0.925 0.0374
## 12 55 0.166 0.0229 9997 2.01 0.407 0.918 0.0445
## 13 60 0.182 0.0204 10919 2.01 0.427 0.909 0.0534
## 14 65 0.198 0.0199 11278 2.01 0.445 0.902 0.0600
## 15 70 0.223 0.00685 12057 1.99 0.472 0.888 0.0721
## 16 75 0.239 0.00575 12812 2.03 0.489 0.882 0.0823
## 17 80 0.267 0.0121 13248 1.97 0.516 0.865 0.0950
## 18 85 0.288 0.00731 13982 2.00 0.537 0.856 0.108
## 19 90 0.295 0.0104 15304 1.97 0.543 0.850 0.121
## 20 95 0.301 0.0299 14419 1.93 0.549 0.844 0.117
## 21 100 0.403 0.393 4489 0.554 0.634 0.273 0.0486
The model is now accurate even at high relative humidities
An approximate formula for vapor pressure deficit is assessed for resonance with the data:
# Approximate formula for vapor pressure deficit
# Source https://pulsegrow.com/blogs/learn/vpd
calcVPD <- function(t, d, c1=610.78, c2=17.2694, c3=237.3) {
# SVP (saturation vapor pressure) = 610.78 * exp(T * 17.2694 / (T + 237.3))
# VPD = (1 - RH/100) * SVP
# Formula produces VPD in Pa, divide by 1000 to convert to kPa
(1 - calcRH(t, d)/100) * c1 * exp(t * c2 / (t + c3)) / 1000
}
# Applied to sample data
dfTestTemp_v3 %>%
select(src, t=temperature_2m, d=dewpoint_2m, v=vapor_pressure_deficit) %>%
mutate(cvpd=calcVPD(t, d)) %>%
ggplot(aes(x=v, y=cvpd)) +
geom_point(aes(color=src)) +
facet_wrap(~src) +
geom_smooth(method="lm") +
geom_abline(intercept=0, slope=1, lty=2) +
labs(x="Reported vapor pressure deficit (kPa)",
y="Formula vapor pressure deficit (kPa)",
title="Vapor pressure deficit by formula from temperature and dewpoint vs. reported in raw data") +
scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'
The formula is a strong match to the reported data, which should allow the random forest to find the correct third value when given two of T, D, VPD (provided that training space also includes that combination)
Example training data is created for all temperatures and dew points between -50 and 50 (rounded to the nearest 1), with VPD calculated based on formula:
# Sample dataset
rhTrainVPD <- expand.grid(t=seq(-50, 50, by=1), d=seq(-50, 50, by=1)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(vpd=calcVPD(t, d))
rhTrainVPD
## # A tibble: 5,151 × 3
## t d vpd
## <dbl> <dbl> <dbl>
## 1 -50 -50 0
## 2 -49 -50 0.000738
## 3 -48 -50 0.00156
## 4 -47 -50 0.00247
## 5 -46 -50 0.00348
## 6 -45 -50 0.00461
## 7 -44 -50 0.00585
## 8 -43 -50 0.00722
## 9 -42 -50 0.00874
## 10 -41 -50 0.0104
## # ℹ 5,141 more rows
# Training and testing (mtry=1)
rhOutVPD <- rhTrainVPD %>%
bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("vpd", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=1,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit),
rndTo=1,
returnData=TRUE
)
##
## R-squared of test data is: 99.667% (RMSE 0.6 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOutVPD <- rhOutVPD$tstPred
rhOutVPD
## # A tibble: 182,635 × 5
## src t d vpd pred
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 NYC -1 -1.6 0.02 -2.30
## 2 NYC -0.8 -1.2 0.02 -1.54
## 3 NYC -0.7 -1.1 0.02 -1.54
## 4 NYC -0.6 -1 0.02 -1.54
## 5 NYC 4.8 0.4 0.23 4.53
## 6 NYC 1.7 -0.4 0.1 2.81
## 7 NYC -1.8 -6.2 0.15 -1.17
## 8 NYC -2 -9.9 0.24 -1.74
## 9 NYC -3.7 -13.1 0.24 -3.20
## 10 NYC -8.7 -17.4 0.16 -8.20
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.291 -0.120 36557 125. 0.539 0.998
## 2 Houston 0.720 0.161 36998 60.4 0.849 0.988
## 3 LA 0.354 0.0313 36972 51.9 0.595 0.993
## 4 NYC 0.362 -0.0388 35474 102. 0.601 0.996
## 5 Vegas 0.0710 -0.0715 36634 110. 0.267 0.999
# Errors by VPD
rhOutVPD %>%
mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
group_by(vpd_rnd) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=30)
## # A tibble: 24 × 8
## vpd_rnd e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 2.29 0.746 3081 51.6 1.51 0.956 0.107
## 2 0.05 2.41 0.791 10402 105. 1.55 0.977 0.381
## 3 0.1 1.28 0.0988 12935 94.8 1.13 0.986 0.253
## 4 0.15 0.371 -0.0862 11789 89.5 0.609 0.996 0.0665
## 5 0.2 0.190 -0.153 10651 83.5 0.436 0.998 0.0308
## 6 0.25 0.122 -0.162 9233 74.7 0.349 0.998 0.0171
## 7 0.3 0.114 -0.107 8018 66.5 0.338 0.998 0.0139
## 8 0.35 0.110 -0.110 6911 59.1 0.331 0.998 0.0115
## 9 0.4 0.0856 -0.0950 15064 50.3 0.293 0.998 0.0196
## 10 0.6 0.0778 -0.0188 16435 41.9 0.279 0.998 0.0194
## 11 0.8 0.0630 0.00694 13819 36.6 0.251 0.998 0.0132
## 12 1 0.134 -0.251 9892 31.8 0.366 0.996 0.0201
## 13 1.2 0.0941 -0.217 8747 28.5 0.307 0.997 0.0125
## 14 1.4 0.0533 -0.119 6520 24.7 0.231 0.998 0.00528
## 15 1.6 0.0420 -0.0535 5911 21.4 0.205 0.998 0.00378
## 16 1.8 0.0542 0.0862 4359 18.9 0.233 0.997 0.00359
## 17 2 0.0723 0.0217 10158 14.5 0.269 0.995 0.0112
## 18 3 0.0431 -0.0900 9263 9.29 0.207 0.995 0.00606
## 19 4 0.0370 -0.0382 4687 4.41 0.192 0.992 0.00264
## 20 5 0.0300 0.0323 2502 2.02 0.173 0.985 0.00114
## 21 6 0.0239 0.0171 1415 1.23 0.155 0.981 0.000514
## 22 7 0.0427 0.137 605 0.766 0.207 0.944 0.000393
## 23 8 0.0517 0.176 205 0.547 0.227 0.906 0.000161
## 24 9 0.0975 0.266 33 0.288 0.312 0.661 0.0000489
Training data rounds temperature to the nearest degree, making temperature predictions commonly off by a fraction of a degree. The model is generally accurate, with the exception of very low/high vapor pressure deficits.
The model is updated to use mtry=2:
# Training and testing (mtry=2)
rhOutVPD_mt2 <- rhTrainVPD %>%
bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("vpd", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=2,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit),
rndTo=2,
returnData=TRUE
)
##
## R-squared of test data is: 99.931% (RMSE 0.27 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOutVPD_mt2 <- rhOutVPD_mt2$tstPred
rhOutVPD_mt2
## # A tibble: 182,635 × 5
## src t d vpd pred
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 NYC -1 -1.6 0.02 -2.06
## 2 NYC -0.8 -1.2 0.02 -1
## 3 NYC -0.7 -1.1 0.02 -1
## 4 NYC -0.6 -1 0.02 -1
## 5 NYC 4.8 0.4 0.23 4.54
## 6 NYC 1.7 -0.4 0.1 2.44
## 7 NYC -1.8 -6.2 0.15 -1.54
## 8 NYC -2 -9.9 0.24 -2.19
## 9 NYC -3.7 -13.1 0.24 -3.82
## 10 NYC -8.7 -17.4 0.16 -8.58
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD_mt2 %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.0807 0.0556 36557 125. 0.284 0.999
## 2 Houston 0.0930 0.0725 36998 60.4 0.305 0.998
## 3 LA 0.0664 0.0503 36972 51.9 0.258 0.999
## 4 NYC 0.0804 0.0616 35474 102. 0.284 0.999
## 5 Vegas 0.0527 0.00782 36634 110. 0.230 1.00
# Errors by VPD
rhOutVPD_mt2 %>%
mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
group_by(vpd_rnd) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=30)
## # A tibble: 24 × 8
## vpd_rnd e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.135 0.189 3081 51.6 0.368 0.997 0.0306
## 2 0.05 0.176 0.211 10402 105. 0.420 0.998 0.134
## 3 0.1 0.127 0.0873 12935 94.8 0.356 0.999 0.120
## 4 0.15 0.114 0.0702 11789 89.5 0.337 0.999 0.0985
## 5 0.2 0.0873 0.0103 10651 83.5 0.295 0.999 0.0682
## 6 0.25 0.0769 0.0377 9233 74.7 0.277 0.999 0.0521
## 7 0.3 0.0769 0.0509 8018 66.5 0.277 0.999 0.0452
## 8 0.35 0.0741 0.0363 6911 59.1 0.272 0.999 0.0376
## 9 0.4 0.0625 0.0319 15064 50.3 0.250 0.999 0.0690
## 10 0.6 0.0525 0.0446 16435 41.9 0.229 0.999 0.0634
## 11 0.8 0.0474 0.0339 13819 36.6 0.218 0.999 0.0480
## 12 1 0.0445 0.0357 9892 31.8 0.211 0.999 0.0323
## 13 1.2 0.0379 0.0244 8747 28.5 0.195 0.999 0.0243
## 14 1.4 0.0341 0.0283 6520 24.7 0.185 0.999 0.0163
## 15 1.6 0.0351 0.0364 5911 21.4 0.187 0.998 0.0152
## 16 1.8 0.0472 0.0317 4359 18.9 0.217 0.998 0.0151
## 17 2 0.0426 0.0236 10158 14.5 0.206 0.997 0.0318
## 18 3 0.0588 0.0210 9263 9.29 0.242 0.994 0.0399
## 19 4 0.0723 0.00279 4687 4.41 0.269 0.984 0.0249
## 20 5 0.0849 -0.0209 2502 2.02 0.291 0.958 0.0156
## 21 6 0.0986 0.00277 1415 1.23 0.314 0.920 0.0102
## 22 7 0.112 0.0000231 605 0.766 0.335 0.853 0.00499
## 23 8 0.104 0.0297 205 0.547 0.323 0.810 0.00157
## 24 9 0.233 0.170 33 0.288 0.483 0.190 0.000564
The model is more accurate, particularly for very low vapor pressure deficits
Training data is updated to include 0.2 degree granularity for temperature and dewpoint:
# Sample dataset
rhTrainVPD_02 <- expand.grid(t=seq(-50, 50, by=0.2), d=seq(-50, 50, by=0.2)) %>%
tibble::as_tibble() %>%
filter(d<=t) %>%
mutate(vpd=calcVPD(t, d))
rhTrainVPD_02
## # A tibble: 125,751 × 3
## t d vpd
## <dbl> <dbl> <dbl>
## 1 -50 -50 0
## 2 -49.8 -50 0.000141
## 3 -49.6 -50 0.000286
## 4 -49.4 -50 0.000433
## 5 -49.2 -50 0.000584
## 6 -49 -50 0.000738
## 7 -48.8 -50 0.000895
## 8 -48.6 -50 0.00106
## 9 -48.4 -50 0.00122
## 10 -48.2 -50 0.00139
## # ℹ 125,741 more rows
# Training and testing (mtry=2)
rhOutVPD_02_mt2 <- rhTrainVPD_02 %>%
bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
runFullRF(dfTrain=.,
yVar=c("t"),
xVars=c("vpd", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=2,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit),
rndTo=2,
returnData=TRUE
)
## Growing trees.. Progress: 8%. Estimated remaining time: 5 minutes, 58 seconds.
## Growing trees.. Progress: 18%. Estimated remaining time: 5 minutes, 14 seconds.
## Growing trees.. Progress: 25%. Estimated remaining time: 5 minutes, 3 seconds.
## Growing trees.. Progress: 32%. Estimated remaining time: 4 minutes, 31 seconds.
## Growing trees.. Progress: 41%. Estimated remaining time: 3 minutes, 56 seconds.
## Growing trees.. Progress: 49%. Estimated remaining time: 3 minutes, 18 seconds.
## Growing trees.. Progress: 57%. Estimated remaining time: 2 minutes, 47 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 2 minutes, 12 seconds.
## Growing trees.. Progress: 74%. Estimated remaining time: 1 minute, 40 seconds.
## Growing trees.. Progress: 82%. Estimated remaining time: 1 minute, 11 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 4 seconds.
##
## R-squared of test data is: 99.995% (RMSE 0.07 vs. 10.39 null)
## `geom_smooth()` using formula = 'y ~ x'
rhOutVPD_02_mt2 <- rhOutVPD_02_mt2$tstPred
rhOutVPD_02_mt2
## # A tibble: 182,635 × 5
## src t d vpd pred
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 NYC -1 -1.6 0.02 -1.11
## 2 NYC -0.8 -1.2 0.02 -0.700
## 3 NYC -0.7 -1.1 0.02 -0.699
## 4 NYC -0.6 -1 0.02 -0.490
## 5 NYC 4.8 0.4 0.23 4.80
## 6 NYC 1.7 -0.4 0.1 1.70
## 7 NYC -1.8 -6.2 0.15 -1.80
## 8 NYC -2 -9.9 0.24 -2.00
## 9 NYC -3.7 -13.1 0.24 -3.79
## 10 NYC -8.7 -17.4 0.16 -8.72
## # ℹ 182,625 more rows
# Errors by city
rhOutVPD_02_mt2 %>%
group_by(src) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.00844 0.0298 36557 125. 0.0919 1.00
## 2 Houston 0.00585 0.0364 36998 60.4 0.0765 1.00
## 3 LA 0.00441 0.0264 36972 51.9 0.0664 1.00
## 4 NYC 0.00674 0.0329 35474 102. 0.0821 1.00
## 5 Vegas 0.00242 -0.00111 36634 110. 0.0492 1.00
# Errors by VPD
rhOutVPD_02_mt2 %>%
mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
group_by(vpd_rnd) %>%
summarize(e2=mean((t-pred)**2), mu=mean(t-pred), n=n(), e2Base=mean((t-mean(t))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=30)
## # A tibble: 24 × 8
## vpd_rnd e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.0157 0.0784 3081 51.6 0.125 1.00 0.0477
## 2 0.05 0.0136 0.0436 10402 105. 0.117 1.00 0.139
## 3 0.1 0.0103 0.0386 12935 94.8 0.101 1.00 0.131
## 4 0.15 0.00893 0.0353 11789 89.5 0.0945 1.00 0.104
## 5 0.2 0.00783 0.0329 10651 83.5 0.0885 1.00 0.0821
## 6 0.25 0.00689 0.0305 9233 74.7 0.0830 1.00 0.0626
## 7 0.3 0.00621 0.0292 8018 66.5 0.0788 1.00 0.0490
## 8 0.35 0.00570 0.0291 6911 59.1 0.0755 1.00 0.0388
## 9 0.4 0.00508 0.0273 15064 50.3 0.0713 1.00 0.0754
## 10 0.6 0.00436 0.0247 16435 41.9 0.0661 1.00 0.0706
## 11 0.8 0.00364 0.0216 13819 36.6 0.0603 1.00 0.0495
## 12 1 0.00322 0.0186 9892 31.8 0.0568 1.00 0.0314
## 13 1.2 0.00294 0.0173 8747 28.5 0.0542 1.00 0.0253
## 14 1.4 0.00264 0.0169 6520 24.7 0.0514 1.00 0.0169
## 15 1.6 0.00259 0.0159 5911 21.4 0.0509 1.00 0.0151
## 16 1.8 0.00243 0.0149 4359 18.9 0.0493 1.00 0.0104
## 17 2 0.00210 0.0119 10158 14.5 0.0458 1.00 0.0210
## 18 3 0.00164 0.00576 9263 9.29 0.0405 1.00 0.0150
## 19 4 0.00143 -0.000393 4687 4.41 0.0378 1.00 0.00658
## 20 5 0.00163 -0.00335 2502 2.02 0.0404 0.999 0.00402
## 21 6 0.00212 -0.0104 1415 1.23 0.0460 0.998 0.00295
## 22 7 0.00245 -0.0137 605 0.766 0.0495 0.997 0.00146
## 23 8 0.00236 -0.0116 205 0.547 0.0486 0.996 0.000477
## 24 9 0.00263 -0.0169 33 0.288 0.0513 0.991 0.0000855
Predictions become extremely accurate, at the expense of long run times
The model is run to predict VPD as f(T, D):
# Training and testing (mtry=2)
predVPD_02_mt2 <- rhTrainVPD_02 %>%
# bind_rows(.,.,.,.,.,.,.,.,.,.) %>%
runFullRF(dfTrain=.,
yVar=c("vpd"),
xVars=c("t", "d"),
isContVar=TRUE,
refXY=TRUE,
mtry=2,
dfTest=allCity %>%
filter(tt=="test") %>%
select(src, t=temperature_2m, d=dewpoint_2m, vpd=vapor_pressure_deficit),
rndTo=0.025,
returnData=TRUE
)
##
## R-squared of test data is: 99.991% (RMSE 0.01 vs. 1.2 null)
## `geom_smooth()` using formula = 'y ~ x'
predVPD_02_mt2 <- predVPD_02_mt2$tstPred
predVPD_02_mt2
## # A tibble: 182,635 × 5
## src t d vpd pred
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 NYC -1 -1.6 0.02 0.0247
## 2 NYC -0.8 -1.2 0.02 0.0167
## 3 NYC -0.7 -1.1 0.02 0.0167
## 4 NYC -0.6 -1 0.02 0.0178
## 5 NYC 4.8 0.4 0.23 0.232
## 6 NYC 1.7 -0.4 0.1 0.0931
## 7 NYC -1.8 -6.2 0.15 0.151
## 8 NYC -2 -9.9 0.24 0.241
## 9 NYC -3.7 -13.1 0.24 0.239
## 10 NYC -8.7 -17.4 0.16 0.158
## # ℹ 182,625 more rows
# Errors by city
predVPD_02_mt2 %>%
group_by(src) %>%
summarize(e2=mean((vpd-pred)**2), mu=mean(vpd-pred), n=n(), e2Base=mean((vpd-mean(vpd))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base)
## # A tibble: 5 × 7
## src e2 mu n e2Base rmse r2
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Chicago 0.0000808 0.00126 36557 0.248 0.00899 1.00
## 2 Houston 0.000159 0.00166 36998 0.590 0.0126 1.00
## 3 LA 0.000113 0.00305 36972 1.07 0.0106 1.00
## 4 NYC 0.0000790 0.00119 35474 0.298 0.00889 1.00
## 5 Vegas 0.000236 0.00865 36634 2.93 0.0154 1.00
# Errors by VPD
predVPD_02_mt2 %>%
mutate(vpd_rnd=ifelse(vpd<0.4, round(vpd*20)/20, ifelse(vpd<2, round(vpd*5)/5, round(vpd/1)*1))) %>%
group_by(vpd_rnd) %>%
summarize(e2=mean((vpd-pred)**2), mu=mean(vpd-pred), n=n(), e2Base=mean((vpd-mean(vpd))**2)) %>%
mutate(rmse=sqrt(e2), r2=1-e2/e2Base, e2pct=n*e2/sum(n*e2)) %>%
print(n=30)
## # A tibble: 24 × 8
## vpd_rnd e2 mu n e2Base rmse r2 e2pct
## <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.0000998 -0.00744 3081 0.0000547 0.00999 -0.825 0.0126
## 2 0.05 0.0000464 -0.000706 10402 0.000188 0.00681 0.753 0.0197
## 3 0.1 0.0000565 0.000282 12935 0.000201 0.00751 0.719 0.0298
## 4 0.15 0.0000639 0.000490 11789 0.000199 0.00800 0.678 0.0308
## 5 0.2 0.0000671 0.000651 10651 0.000201 0.00819 0.666 0.0292
## 6 0.25 0.0000692 0.000996 9233 0.000201 0.00832 0.655 0.0261
## 7 0.3 0.0000748 0.00125 8018 0.000201 0.00865 0.627 0.0245
## 8 0.35 0.0000798 0.00126 6911 0.000201 0.00893 0.603 0.0225
## 9 0.4 0.0000838 0.00166 15064 0.00138 0.00915 0.939 0.0515
## 10 0.6 0.0000914 0.00208 16435 0.00299 0.00956 0.969 0.0614
## 11 0.8 0.000105 0.00276 13819 0.00364 0.0102 0.971 0.0592
## 12 1 0.000115 0.00342 9892 0.00302 0.0107 0.962 0.0465
## 13 1.2 0.000130 0.00398 8747 0.00371 0.0114 0.965 0.0464
## 14 1.4 0.000142 0.00442 6520 0.00298 0.0119 0.952 0.0379
## 15 1.6 0.000160 0.00482 5911 0.00368 0.0126 0.957 0.0385
## 16 1.8 0.000182 0.00616 4359 0.00302 0.0135 0.940 0.0324
## 17 2 0.000214 0.00708 10158 0.0309 0.0146 0.993 0.0887
## 18 3 0.000293 0.00969 9263 0.0802 0.0171 0.996 0.111
## 19 4 0.000431 0.0133 4687 0.0813 0.0208 0.995 0.0824
## 20 5 0.000604 0.0166 2502 0.0808 0.0246 0.993 0.0617
## 21 6 0.000823 0.0204 1415 0.0777 0.0287 0.989 0.0475
## 22 7 0.00108 0.0239 605 0.0774 0.0329 0.986 0.0267
## 23 8 0.00135 0.0274 205 0.0672 0.0368 0.980 0.0113
## 24 9 0.00152 0.0282 33 0.0486 0.0390 0.969 0.00205
Predictions are very accurate, as expected
A model is run to predict cloud cover, at first allowing the cloud subset data (low, mid, high):
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfCloudFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="cloudcover",
xVars=c(varsTrain[!str_detect(varsTrain, "cloudcover$")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 17%. Estimated remaining time: 2 minutes, 33 seconds.
## Growing trees.. Progress: 34%. Estimated remaining time: 1 minute, 59 seconds.
## Growing trees.. Progress: 51%. Estimated remaining time: 1 minute, 29 seconds.
## Growing trees.. Progress: 68%. Estimated remaining time: 58 seconds.
## Growing trees.. Progress: 83%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.481% (RMSE 2.61 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is effective at predicting cloud cover. Of interest, variable importance is highest for ‘weathercode’, a categorical variable improperly run as an integer. The interpretation of WMO weather codes is available at https://www.nodc.noaa.gov/archive/arc0021/0002199/1.1/data/0-data/HTML/WMO-CODE/WMO4677.HTM
The weather code and cloud cover variables are explored:
# Distribution of 'cloudcover'
allCity %>%
select(weathercode, cloudcover) %>%
mutate(weathercode=factor(weathercode)) %>%
ggplot(aes(x=cloudcover)) +
geom_histogram(bins=50, aes(y=after_stat(count)/sum(after_stat(count)))) +
labs(title="Distribution of cloud cover", y="Proportion of total observations")
# Distribution of 'weathercode'
allCity %>%
select(weathercode, cloudcover) %>%
mutate(weathercode=factor(weathercode)) %>%
ggplot(aes(x=weathercode)) +
geom_bar(aes(y=after_stat(count)/sum(after_stat(count)))) +
labs(title="Distribution of weather code", y="Proportion of total observations")
# Cloud cover boxplot by weather code
allCity %>%
select(weathercode, cloudcover) %>%
mutate(weathercode=factor(weathercode)) %>%
ggplot(aes(x=weathercode, y=cloudcover)) +
geom_boxplot(fill="lightblue") +
labs(title="Cloud cover by weather code", y="Cloud cover")
Weather code is strongly predictive of cloud cover, with codes 00 and 01 associated with few clouds and other clouds associated with many clouds
The model is run to predict cloud cover using only weather code as a factor:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="train", year<2022),
yVar="cloudcover",
xVars=c("fct_wmo"),
dfTest=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.458% (RMSE 9.96 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'
The model drives over 90% R-squared, with RMSE falling from ~35 in the baseline to ~10 with predictions based solely on weather code
The model is run to predict cloud cover using only the three cloud cover (low, mid, high) predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="train", year<2022),
yVar="cloudcover",
xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]),
dfTest=allCity %>% mutate(fct_wmo=factor(weathercode)) %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
## Growing trees.. Progress: 93%. Estimated remaining time: 2 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.129% (RMSE 3.39 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'
The model drives over 99% R-squared, with RMSE falling from ~35 in the baseline to ~3 with predictions based solely on cloud cover sub-types
All combinations of two variables are explored for predicting cloud cover on a smaller training dataset:
# Train and test data
dfTrainCloud <- allCity %>%
filter(tt=="train", year<2022) %>%
mutate(fct_src=factor(src))
dfTestCloud <- allCity %>%
filter(tt=="test", year==2022) %>%
mutate(fct_src=factor(src))
# Variables to explore
possCloudVars <- c(varsTrain[!str_detect(varsTrain, "cover$")], "month", "tod")
# Subsets to use
set.seed(24080616)
idxSmallCloud <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallCloud <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possCloudVars)-1)) {
for(idx2 in (idx1+1):length(possCloudVars)) {
r2SmallCloud <- runFullRF(dfTrain=dfTrainCloud[idxSmallCloud,],
yVar="cloudcover",
xVars=possCloudVars[c(idx1, idx2)],
dfTest=dfTestCloud,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallCloud <- rbind(mtxSmallCloud, c(idx1, idx2, r2SmallCloud))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.345% (RMSE 36.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.025% (RMSE 30.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.405% (RMSE 36.53 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.724% (RMSE 36.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.72% (RMSE 36.94 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.649% (RMSE 35.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.495% (RMSE 31.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.401% (RMSE 32.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.944% (RMSE 35.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.722% (RMSE 21.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.433% (RMSE 26.3 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.782% (RMSE 32.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.022% (RMSE 33.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.673% (RMSE 32.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.009% (RMSE 30.78 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.063% (RMSE 34.59 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.179% (RMSE 37.02 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.091% (RMSE 36.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.492% (RMSE 37.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.869% (RMSE 37.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.03% (RMSE 36.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.382% (RMSE 33.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.079% (RMSE 10.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.003% (RMSE 32.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.298% (RMSE 36.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.628% (RMSE 36.16 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 36.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.571% (RMSE 35.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.649% (RMSE 35.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.323% (RMSE 35.85 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.287% (RMSE 36.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.867% (RMSE 35.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.672% (RMSE 36.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.325% (RMSE 37.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.705% (RMSE 36.15 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.183% (RMSE 36.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.577% (RMSE 32.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.12% (RMSE 33.22 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.115% (RMSE 36.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.732% (RMSE 36.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.539% (RMSE 35.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.049% (RMSE 31.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.706% (RMSE 31.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.775% (RMSE 35.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.152% (RMSE 21.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.784% (RMSE 26.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.941% (RMSE 31.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.59% (RMSE 35.62 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.587% (RMSE 35.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.386% (RMSE 33.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.564% (RMSE 34.69 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.489% (RMSE 37.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.003% (RMSE 37.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.142% (RMSE 37.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.268% (RMSE 37.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.09% (RMSE 37.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.762% (RMSE 36.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.533% (RMSE 9.22 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.541% (RMSE 32.94 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.541% (RMSE 37.62 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.843% (RMSE 36.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.273% (RMSE 36.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.834% (RMSE 36.61 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.293% (RMSE 35.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.787% (RMSE 36.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.527% (RMSE 36.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.007% (RMSE 36.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.155% (RMSE 36.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.982% (RMSE 37.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.157% (RMSE 35.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.055% (RMSE 35.53 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.849% (RMSE 32.88 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.019% (RMSE 32.84 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.093% (RMSE 32.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.267% (RMSE 32.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.348% (RMSE 29.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.912% (RMSE 29.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.237% (RMSE 31.78 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.126% (RMSE 21.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.464% (RMSE 23.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.647% (RMSE 28.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.742% (RMSE 31.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.515% (RMSE 31.72 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.541% (RMSE 30.66 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.347% (RMSE 30.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.086% (RMSE 31.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.758% (RMSE 32.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.576% (RMSE 33.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.681% (RMSE 33.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.356% (RMSE 30.92 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.242% (RMSE 31.36 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.148% (RMSE 9.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.576% (RMSE 32.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.743% (RMSE 32.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.306% (RMSE 32.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.493% (RMSE 32.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.841% (RMSE 32.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.275% (RMSE 32.79 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.518% (RMSE 32.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.731% (RMSE 32.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.197% (RMSE 32.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.85% (RMSE 32.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.915% (RMSE 32.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.717% (RMSE 31.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.374% (RMSE 31.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.137% (RMSE 32.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.88% (RMSE 37.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.174% (RMSE 36.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.033% (RMSE 31.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.253% (RMSE 32.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.317% (RMSE 34.92 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.043% (RMSE 21.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.864% (RMSE 26.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.498% (RMSE 32.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.971% (RMSE 35.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.283% (RMSE 34.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.339% (RMSE 33.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.123% (RMSE 35.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.242% (RMSE 37.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.438% (RMSE 36.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.788% (RMSE 38.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.494% (RMSE 37.96 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.133% (RMSE 37.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.069% (RMSE 35.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.105% (RMSE 9.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.421% (RMSE 32.96 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.061% (RMSE 33.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.767% (RMSE 32.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.838% (RMSE 33.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.325% (RMSE 35.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.654% (RMSE 35.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.303% (RMSE 36.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.051% (RMSE 36.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.524% (RMSE 36.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.981% (RMSE 36.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.638% (RMSE 35.79 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.424% (RMSE 34.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.443% (RMSE 35.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.391% (RMSE 37.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.779% (RMSE 35.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.118% (RMSE 31.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.88% (RMSE 31.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.895% (RMSE 35.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.925% (RMSE 21.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.383% (RMSE 26.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.334% (RMSE 31.97 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.086% (RMSE 36.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.262% (RMSE 35.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.784% (RMSE 33.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.038% (RMSE 35.16 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.721% (RMSE 37.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.346% (RMSE 37.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.729% (RMSE 37.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.502% (RMSE 38.13 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.063% (RMSE 37.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.864% (RMSE 36.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.571% (RMSE 9.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.525% (RMSE 33.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.154% (RMSE 37.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.293% (RMSE 36.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.274% (RMSE 36.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.022% (RMSE 37.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.704% (RMSE 35.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.91% (RMSE 36.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.228% (RMSE 37.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.442% (RMSE 36.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.36% (RMSE 36.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.43% (RMSE 37.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.451% (RMSE 36.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.937% (RMSE 35.92 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.668% (RMSE 35.97 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.089% (RMSE 32.02 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.995% (RMSE 32.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.168% (RMSE 35.69 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.69% (RMSE 21.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.385% (RMSE 26.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.373% (RMSE 32.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.424% (RMSE 36.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.296% (RMSE 35.67 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.94% (RMSE 34.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.467% (RMSE 35.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.152% (RMSE 38.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.713% (RMSE 38.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.065% (RMSE 38.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.547% (RMSE 38.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.053% (RMSE 38.05 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.948% (RMSE 36.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.214% (RMSE 10.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.965% (RMSE 33.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.381% (RMSE 37.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.005% (RMSE 36.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.805% (RMSE 36.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.579% (RMSE 36.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.617% (RMSE 35.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.664% (RMSE 36.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.029% (RMSE 36.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.446% (RMSE 36.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.712% (RMSE 36.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.39% (RMSE 37.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.515% (RMSE 35.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.163% (RMSE 36.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.917% (RMSE 30.8 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.416% (RMSE 31.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.124% (RMSE 34.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.107% (RMSE 21.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.112% (RMSE 25.88 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.623% (RMSE 31.28 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.417% (RMSE 34.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.068% (RMSE 34.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.017% (RMSE 32.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.921% (RMSE 34.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.893% (RMSE 36.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.6% (RMSE 36.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.053% (RMSE 36.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.4% (RMSE 36.53 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.362% (RMSE 36.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.914% (RMSE 34.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.537% (RMSE 9.91 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.663% (RMSE 33.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.924% (RMSE 35.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.303% (RMSE 35.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.264% (RMSE 35.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.461% (RMSE 35.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.661% (RMSE 34.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.264% (RMSE 35.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.776% (RMSE 36.95 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.148% (RMSE 36.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.658% (RMSE 34.67 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.734% (RMSE 35.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.789% (RMSE 34.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.124% (RMSE 34.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.46% (RMSE 31.94 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.459% (RMSE 32.15 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.619% (RMSE 21.27 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.35% (RMSE 25.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.065% (RMSE 28.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.433% (RMSE 31.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.386% (RMSE 30.91 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.549% (RMSE 30.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.782% (RMSE 30.61 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.507% (RMSE 31.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.332% (RMSE 31.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.759% (RMSE 31.88 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.63% (RMSE 31.91 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.737% (RMSE 31.88 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.723% (RMSE 31.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.134% (RMSE 14.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.033% (RMSE 29.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.895% (RMSE 31.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.897% (RMSE 31.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.309% (RMSE 30.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.299% (RMSE 30.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.568% (RMSE 30.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.024% (RMSE 30.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.336% (RMSE 31.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.682% (RMSE 30.85 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.826% (RMSE 31.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.307% (RMSE 31.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.61% (RMSE 31.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.186% (RMSE 32.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.004% (RMSE 32.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.055% (RMSE 21.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.689% (RMSE 25.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.182% (RMSE 28.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.527% (RMSE 31.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.393% (RMSE 31.54 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.911% (RMSE 30.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.381% (RMSE 31.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.763% (RMSE 32.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.521% (RMSE 32.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.638% (RMSE 32.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.421% (RMSE 32.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.854% (RMSE 32.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.73% (RMSE 31.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.655% (RMSE 14.67 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.076% (RMSE 29.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.339% (RMSE 31.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.968% (RMSE 31.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.552% (RMSE 31.3 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.185% (RMSE 31.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.044% (RMSE 31.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.558% (RMSE 31.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.583% (RMSE 31.92 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.288% (RMSE 31.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.681% (RMSE 32.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.321% (RMSE 32.18 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.595% (RMSE 32.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.418% (RMSE 32.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.065% (RMSE 22.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.299% (RMSE 26.83 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.869% (RMSE 31.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.284% (RMSE 34.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.13% (RMSE 34.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.347% (RMSE 33.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.206% (RMSE 34.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.941% (RMSE 35.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.258% (RMSE 35.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.234% (RMSE 35.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.683% (RMSE 35.78 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.479% (RMSE 35.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.972% (RMSE 34.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.41% (RMSE 15.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.122% (RMSE 32.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.791% (RMSE 35.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.479% (RMSE 35.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.191% (RMSE 34.95 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.026% (RMSE 34.79 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.542% (RMSE 33.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.684% (RMSE 34.28 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.454% (RMSE 34.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.867% (RMSE 34.05 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.864% (RMSE 35.75 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.12% (RMSE 35.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.164% (RMSE 35.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.547% (RMSE 35.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.622% (RMSE 11.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.417% (RMSE 14.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.438% (RMSE 21.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.548% (RMSE 21.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.343% (RMSE 20.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.97% (RMSE 20.85 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.19% (RMSE 21.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.564% (RMSE 21.59 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.074% (RMSE 21.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.121% (RMSE 21.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.69% (RMSE 21.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.155% (RMSE 21.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.832% (RMSE 8.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.076% (RMSE 21.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.095% (RMSE 21.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.205% (RMSE 21.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.067% (RMSE 21.13 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.892% (RMSE 21.18 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.219% (RMSE 21.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.911% (RMSE 21.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.505% (RMSE 21.61 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.299% (RMSE 21.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.549% (RMSE 21.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.251% (RMSE 21.69 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.155% (RMSE 21.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.366% (RMSE 23.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.104% (RMSE 24.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.303% (RMSE 26.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.125% (RMSE 25.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.838% (RMSE 25.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.59% (RMSE 25.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.685% (RMSE 26.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.623% (RMSE 26.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.696% (RMSE 26.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.457% (RMSE 26.54 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.215% (RMSE 26.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.184% (RMSE 25.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.707% (RMSE 9.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.103% (RMSE 24.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.539% (RMSE 26.02 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.733% (RMSE 25.97 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.19% (RMSE 26.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.445% (RMSE 26.3 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.81% (RMSE 25.7 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.552% (RMSE 26.02 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.77% (RMSE 26.22 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.663% (RMSE 26.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.71% (RMSE 26.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.704% (RMSE 26.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.132% (RMSE 25.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.548% (RMSE 27.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.286% (RMSE 31.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.167% (RMSE 30.96 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.937% (RMSE 29.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.806% (RMSE 31.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.629% (RMSE 32.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.711% (RMSE 32.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.685% (RMSE 32.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.762% (RMSE 32.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.256% (RMSE 32.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.391% (RMSE 31.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.434% (RMSE 9.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.405% (RMSE 28.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.373% (RMSE 31.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.909% (RMSE 31.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.304% (RMSE 31.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.248% (RMSE 31.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.466% (RMSE 30.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.576% (RMSE 31.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.972% (RMSE 31.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.651% (RMSE 31.28 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.506% (RMSE 31.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.609% (RMSE 32.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.744% (RMSE 31.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.435% (RMSE 31.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.41% (RMSE 30.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.781% (RMSE 30.18 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.292% (RMSE 30.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.442% (RMSE 35.83 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.873% (RMSE 35.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.499% (RMSE 36.36 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.628% (RMSE 36.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.846% (RMSE 35.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.638% (RMSE 34.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.649% (RMSE 9.83 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.867% (RMSE 31.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.909% (RMSE 35.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.021% (RMSE 35.35 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.405% (RMSE 35.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.317% (RMSE 35.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.253% (RMSE 34.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.287% (RMSE 34.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.205% (RMSE 35.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.046% (RMSE 34.78 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.91% (RMSE 35.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.056% (RMSE 35.72 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.067% (RMSE 34.97 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.692% (RMSE 35.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.54% (RMSE 31.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.399% (RMSE 30.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.159% (RMSE 35.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.355% (RMSE 35.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.497% (RMSE 35.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.164% (RMSE 35.88 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.242% (RMSE 34.75 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.724% (RMSE 34.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.197% (RMSE 10.13 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.95% (RMSE 32.05 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.511% (RMSE 35.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.329% (RMSE 34.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.628% (RMSE 34.67 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.23% (RMSE 34.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.07% (RMSE 33.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.91% (RMSE 34.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.826% (RMSE 34.83 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.052% (RMSE 34.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.663% (RMSE 35.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.974% (RMSE 35.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.146% (RMSE 34.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.85% (RMSE 34.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.864% (RMSE 30.16 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.903% (RMSE 33.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.401% (RMSE 33.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.875% (RMSE 33.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.997% (RMSE 34.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.873% (RMSE 33.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.83% (RMSE 32.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.199% (RMSE 10.13 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.233% (RMSE 30.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.879% (RMSE 33.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.239% (RMSE 33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.728% (RMSE 32.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.744% (RMSE 32.7 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.789% (RMSE 32.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.702% (RMSE 32.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.267% (RMSE 33.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.789% (RMSE 32.69 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.678% (RMSE 33.51 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.529% (RMSE 33.54 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.612% (RMSE 33.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.134% (RMSE 32.62 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.849% (RMSE 35.75 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.571% (RMSE 35.8 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.303% (RMSE 36.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.682% (RMSE 36.15 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.83% (RMSE 35.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.277% (RMSE 30.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.566% (RMSE 9.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.533% (RMSE 30.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.679% (RMSE 34.85 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.078% (RMSE 34.59 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.954% (RMSE 34.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.934% (RMSE 34.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.435% (RMSE 33.94 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.743% (RMSE 34.84 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.651% (RMSE 35.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.62% (RMSE 35.05 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.931% (RMSE 35.37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.24% (RMSE 35.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.461% (RMSE 34.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.719% (RMSE 35.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.302% (RMSE 38.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.129% (RMSE 38.58 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.065% (RMSE 38.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.262% (RMSE 37.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.187% (RMSE 35.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.116% (RMSE 10.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.046% (RMSE 33.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.946% (RMSE 37.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.093% (RMSE 37.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.901% (RMSE 36.97 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.571% (RMSE 37.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.588% (RMSE 35.62 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.58% (RMSE 36.56 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.524% (RMSE 36.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.142% (RMSE 36.48 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.405% (RMSE 36.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.484% (RMSE 37.96 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.196% (RMSE 36.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.554% (RMSE 35.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.957% (RMSE 38.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.897% (RMSE 38.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.768% (RMSE 37.66 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.398% (RMSE 35.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.298% (RMSE 10.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.977% (RMSE 33.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.85% (RMSE 37.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.655% (RMSE 37.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.251% (RMSE 37.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.012% (RMSE 36.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.459% (RMSE 35.64 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.109% (RMSE 36.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.571% (RMSE 36.92 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.514% (RMSE 36.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.889% (RMSE 36.11 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.453% (RMSE 37.78 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.545% (RMSE 35.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.973% (RMSE 35.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.9% (RMSE 38.54 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.175% (RMSE 38.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.914% (RMSE 36.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.314% (RMSE 10.06 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.941% (RMSE 33.85 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.181% (RMSE 37.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.793% (RMSE 37.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.034% (RMSE 37 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.377% (RMSE 37.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.798% (RMSE 36.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.484% (RMSE 37.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.776% (RMSE 36.95 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.416% (RMSE 37.59 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.544% (RMSE 36.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.41% (RMSE 38.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.139% (RMSE 36.3 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.384% (RMSE 36.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.264% (RMSE 38.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.843% (RMSE 36.61 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.323% (RMSE 10.05 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.094% (RMSE 33.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.62% (RMSE 37.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.249% (RMSE 37.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.593% (RMSE 37.1 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.289% (RMSE 37.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.39% (RMSE 36.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.909% (RMSE 36.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.642% (RMSE 37.28 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.192% (RMSE 37.73 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.14% (RMSE 36.66 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.995% (RMSE 38.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.374% (RMSE 36.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.267% (RMSE 36.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.198% (RMSE 34.76 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.121% (RMSE 10.18 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.226% (RMSE 32.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.165% (RMSE 37.2 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.906% (RMSE 37.15 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.08% (RMSE 37.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.404% (RMSE 36.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.468% (RMSE 35.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.044% (RMSE 36.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.911% (RMSE 36.8 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.329% (RMSE 36.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.021% (RMSE 36.27 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.461% (RMSE 37.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.707% (RMSE 35.96 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.878% (RMSE 35.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.91% (RMSE 9.66 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.253% (RMSE 31.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.755% (RMSE 35.95 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.21% (RMSE 35.69 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.657% (RMSE 35.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.49% (RMSE 35.45 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.381% (RMSE 34.72 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.461% (RMSE 35.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.434% (RMSE 35.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.717% (RMSE 35.22 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.947% (RMSE 35.74 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.692% (RMSE 36.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.693% (RMSE 35.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.178% (RMSE 34.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.676% (RMSE 9.12 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.626% (RMSE 9.16 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.199% (RMSE 9.46 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.252% (RMSE 9.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.56% (RMSE 9.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.115% (RMSE 10.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.504% (RMSE 9.93 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.142% (RMSE 10.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.759% (RMSE 10.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.184% (RMSE 10.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.919% (RMSE 9.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.547% (RMSE 9.9 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.157% (RMSE 14.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.545% (RMSE 33.34 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.727% (RMSE 33.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.176% (RMSE 33.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.054% (RMSE 33.63 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.266% (RMSE 33.39 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.043% (RMSE 33.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.686% (RMSE 33.7 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.839% (RMSE 33.08 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.93% (RMSE 32.66 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.362% (RMSE 33.57 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.546% (RMSE 32.33 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.245% (RMSE 31.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.568% (RMSE 35.81 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.911% (RMSE 36.98 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.585% (RMSE 36.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.523% (RMSE 35.44 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.259% (RMSE 36.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.9% (RMSE 36.8 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.146% (RMSE 36.3 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.999% (RMSE 36.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.422% (RMSE 37.77 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.115% (RMSE 35.7 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.706% (RMSE 35.6 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.131% (RMSE 37.02 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.261% (RMSE 36.5 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.958% (RMSE 35.55 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.319% (RMSE 36.03 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.295% (RMSE 37.04 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.033% (RMSE 35.72 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.227% (RMSE 35.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.432% (RMSE 36.89 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.13% (RMSE 35.14 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.853% (RMSE 35.38 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.557% (RMSE 37.27 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.528% (RMSE 35.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.405% (RMSE 36.53 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.695% (RMSE 36.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.481% (RMSE 35.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.291% (RMSE 35.67 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.917% (RMSE 37.86 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.189% (RMSE 35.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.861% (RMSE 35.19 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.505% (RMSE 35.07 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.339% (RMSE 37.23 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.108% (RMSE 35.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.264% (RMSE 36.32 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.087% (RMSE 35.71 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.386% (RMSE 37.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.205% (RMSE 35.13 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.412% (RMSE 35.09 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.513% (RMSE 35.82 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.242% (RMSE 35.68 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.25% (RMSE 35.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.04% (RMSE 34.21 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.124% (RMSE 36.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.789% (RMSE 34.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.013% (RMSE 34.22 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.171% (RMSE 36.49 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.085% (RMSE 36.26 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.571% (RMSE 34.87 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.43% (RMSE 37.42 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.97% (RMSE 35.17 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.231% (RMSE 34.75 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.335% (RMSE 36.52 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.379% (RMSE 35.28 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.655% (RMSE 37.29 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.694% (RMSE 35.41 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.079% (RMSE 35.15 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.766% (RMSE 34.65 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.177% (RMSE 36.24 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.701% (RMSE 34.47 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.906% (RMSE 34.43 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 36.4 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.566% (RMSE 35.99 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.181% (RMSE 36.31 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.122% (RMSE 36.25 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.445% (RMSE 36.01 vs. 36.27 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.458% (RMSE 36.01 vs. 36.27 null)
Predictive success by metric is explored:
dfSmallR2Cloud <- as.data.frame(mtxSmallCloud) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possCloudVars[idx1], var2=possCloudVars[idx2], rn=row_number())
dfSmallR2Cloud %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 cloudcover_low weathercode 0.948
## 2 cloudcover_mid weathercode 0.937
## 3 weathercode vapor_pressure_deficit 0.937
## 4 weathercode soil_temperature_0_to_7cm 0.936
## 5 apparent_temperature weathercode 0.936
## 6 temperature_2m weathercode 0.935
## 7 weathercode soil_temperature_28_to_100cm 0.933
## 8 weathercode soil_temperature_7_to_28cm 0.932
## 9 relativehumidity_2m weathercode 0.931
## 10 dewpoint_2m weathercode 0.931
## 11 weathercode doy 0.929
## 12 et0_fao_evapotranspiration weathercode 0.929
## 13 shortwave_radiation weathercode 0.926
## 14 diffuse_radiation weathercode 0.926
## 15 weathercode soil_temperature_100_to_255cm 0.926
## 16 weathercode month 0.925
## 17 surface_pressure weathercode 0.925
## 18 weathercode soil_moisture_7_to_28cm 0.925
## 19 cloudcover_high weathercode 0.924
## 20 winddirection_100m weathercode 0.923
## # ℹ 646 more rows
dfSmallR2Cloud %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting cloud cover",
y="Range of R-squared (min-mean-max)",
x=NULL
)
dfSmallR2Cloud %>%
arrange(desc(r2)) %>%
filter(var2!="weathercode", var1!="weathercode") %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 630 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 cloudcover_low cloudcover_mid 0.906
## 2 cloudcover_low cloudcover_high 0.834
## 3 cloudcover_low direct_normal_irradiance 0.673
## 4 cloudcover_low diffuse_radiation 0.670
## 5 cloudcover_low soil_moisture_100_to_255cm 0.663
## 6 cloudcover_low soil_moisture_0_to_7cm 0.662
## 7 surface_pressure cloudcover_low 0.661
## 8 cloudcover_low soil_temperature_28_to_100cm 0.661
## 9 cloudcover_low soil_temperature_100_to_255cm 0.659
## 10 precipitation cloudcover_low 0.656
## 11 cloudcover_low direct_radiation 0.655
## 12 cloudcover_low shortwave_radiation 0.654
## 13 cloudcover_low soil_temperature_7_to_28cm 0.652
## 14 cloudcover_low month 0.652
## 15 cloudcover_low et0_fao_evapotranspiration 0.652
## 16 temperature_2m cloudcover_low 0.652
## 17 relativehumidity_2m cloudcover_low 0.651
## 18 cloudcover_low soil_temperature_0_to_7cm 0.651
## 19 cloudcover_low vapor_pressure_deficit 0.651
## 20 rain cloudcover_low 0.651
## # ℹ 610 more rows
dfSmallR2Cloud %>%
filter(var2!="weathercode", var1!="weathercode") %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting cloud cover (excluding variable paired with 'weathercode')",
y="Range of R-squared (min-mean-max)",
x=NULL
)
Select combinations are explored using the full training dataset, with mtry=3:
possLargeVars <- c("weathercode",
"cloudcover_low",
"cloudcover_mid",
"cloudcover_high"
)
possLargeVars
## [1] "weathercode" "cloudcover_low" "cloudcover_mid" "cloudcover_high"
mtxLargeCloud <- matrix(nrow=0, ncol=4)
for(idx1 in 1:(length(possLargeVars)-2)) {
for(idx2 in (idx1+1):(length(possLargeVars)-1)) {
for(idx3 in (idx2+1):(length(possLargeVars))) {
r2LargeCloud <- runFullRF(dfTrain=dfTrainCloud[,],
yVar="cloudcover",
xVars=possLargeVars[c(idx1, idx2, idx3)],
dfTest=dfTestCloud,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=3,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLargeCloud <- rbind(mtxLargeCloud, c(idx1, idx2, idx3, r2LargeCloud))
}
}
}
## Growing trees.. Progress: 49%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.104% (RMSE 5 vs. 36.27 null)
## Growing trees.. Progress: 52%. Estimated remaining time: 28 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.085% (RMSE 6.19 vs. 36.27 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 41 seconds.
## Growing trees.. Progress: 84%. Estimated remaining time: 11 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.218% (RMSE 7.93 vs. 36.27 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 41 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 9 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
dfLargeR2Cloud <- as.data.frame(mtxLargeCloud) %>%
purrr::set_names(c("idx1", "idx2", "idx3", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeVars[idx1], var2=possLargeVars[idx2], var3=possLargeVars[idx3], rn=row_number())
dfLargeR2Cloud %>% arrange(desc(r2)) %>% select(var1, var2, var3, r2) %>% print(n=20)
## # A tibble: 4 × 4
## var1 var2 var3 r2
## <chr> <chr> <chr> <dbl>
## 1 cloudcover_low cloudcover_mid cloudcover_high 1.00
## 2 weathercode cloudcover_low cloudcover_mid 0.981
## 3 weathercode cloudcover_low cloudcover_high 0.971
## 4 weathercode cloudcover_mid cloudcover_high 0.952
The three cloud cover subtypes, in combination, have almost perfect predictive power on overall cloud cover
A linear model is run for comparison:
lmMiniCloud <- allCity %>%
filter(tt=="train", year<2022) %>%
select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high) %>%
lm(c~l*m*h, data=.)
summary(lmMiniCloud)
##
## Call:
## lm(formula = c ~ l * m * h, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.6112 -0.8934 0.0381 0.1074 19.3437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.074e-01 7.397e-03 -14.52 <2e-16 ***
## l 9.280e-01 2.442e-04 3799.73 <2e-16 ***
## m 6.507e-01 3.641e-04 1787.35 <2e-16 ***
## h 3.093e-01 1.734e-04 1783.49 <2e-16 ***
## l:m -4.984e-03 6.316e-06 -789.05 <2e-16 ***
## l:h -1.401e-03 5.068e-06 -276.49 <2e-16 ***
## m:h -1.996e-04 5.159e-06 -38.69 <2e-16 ***
## l:m:h -2.158e-05 9.538e-08 -226.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.927 on 368102 degrees of freedom
## Multiple R-squared: 0.9935, Adjusted R-squared: 0.9935
## F-statistic: 8.006e+06 on 7 and 368102 DF, p-value: < 2.2e-16
ggMiniCloud <- predict(lmMiniCloud,
newdata=allCity %>%
filter(tt=="test", year==2022) %>%
select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high)
) %>%
mutate(select(allCity %>% filter(tt=="test", year==2022), cloudcover),
pred=.,
err=pred-cloudcover,
err2=err**2,
rnd5=round(cloudcover/5)*5
) %>%
group_by(rnd5) %>%
summarize(n=n(), across(.cols=where(is.numeric), .fns=mean))
ggMiniCloud %>% print(n=25)
## # A tibble: 21 × 6
## rnd5 n cloudcover pred err err2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 4697 0.208 0.0977 -0.111 0.0255
## 2 5 820 4.68 4.78 0.0924 0.119
## 3 10 590 9.78 10.1 0.296 0.214
## 4 15 483 14.9 15.4 0.440 0.393
## 5 20 437 20.2 20.8 0.619 0.672
## 6 25 429 25.1 25.8 0.712 0.846
## 7 30 1073 30.0 30.7 0.754 0.791
## 8 35 354 35.0 35.8 0.847 1.53
## 9 40 300 40.2 40.9 0.766 2.15
## 10 45 236 44.9 45.3 0.457 2.94
## 11 50 219 50.0 50.3 0.254 4.63
## 12 55 195 54.9 55.3 0.408 6.77
## 13 60 250 59.9 61.3 1.37 13.4
## 14 65 184 64.9 64.9 0.0396 11.5
## 15 70 134 69.8 69.0 -0.810 15.0
## 16 75 148 75.0 74.3 -0.681 19.3
## 17 80 156 79.9 79.3 -0.654 24.4
## 18 85 146 85.0 84.7 -0.227 32.4
## 19 90 710 90.0 91.9 1.88 16.7
## 20 95 155 94.7 90.8 -3.91 43.8
## 21 100 1413 99.9 101. 0.984 37.4
ggMiniCloud %>%
select(rnd5, cloudcover, pred) %>%
pivot_longer(cols=-c(rnd5)) %>%
ggplot(aes(x=rnd5, y=value)) +
geom_line(aes(group=name,
color=c("pred"="Predicted Mean", "cloudcover"="Actual Mean")[name]
)
) +
labs(title="Actual vs. Predicted Cloud Cover Using Linear Model on Holdout Data",
x="Actual cloud cover (rounded to nearest 5)",
y="Average cloud cover for metric"
) +
scale_color_discrete("Metric") +
geom_abline(slope=1, intercept=0, lty=2)
The linear model generally makes strong predictions, though with generally lower accuracy on cloudier days. Distribution of errors is explored:
predict(lmMiniCloud,
newdata=allCity %>%
filter(tt=="test", year==2022) %>%
select(c=cloudcover, l=cloudcover_low, m=cloudcover_mid, h=cloudcover_high)
) %>%
mutate(select(allCity %>% filter(tt=="test", year==2022), cloudcover),
pred=.,
err=pred-cloudcover,
err2=err**2,
rnd5=round(cloudcover/5)*5,
rndCat=case_when(cloudcover<10~"1) clear (<10)",
cloudcover<50~"2) partly (10-50)",
cloudcover<90~"3) mostly (50-90)",
TRUE~"4) cloudy (>90)"
)
) %>%
ggplot(aes(x=err)) +
geom_histogram(fill="lightblue") +
labs(title="Errors in linear model cloud cover prediction by amount of clouds",
x="Error (Predicted minus Actual)",
y="# Observations"
) +
facet_wrap(~rndCat, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Predictions for the random forest model are also explored:
rfSubCloudPred <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="cloudcover",
xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=3,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)[["tstPred"]]
## Growing trees.. Progress: 46%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
## `geom_smooth()` using formula = 'y ~ x'
rfSubCloudPred
## # A tibble: 13,129 × 84
## src time date hour temperature_2m relativehumidity_2m
## <chr> <dttm> <date> <int> <dbl> <int>
## 1 NYC 2022-01-01 00:00:00 2022-01-01 0 9.2 97
## 2 NYC 2022-01-01 01:00:00 2022-01-01 1 8.9 98
## 3 NYC 2022-01-01 10:00:00 2022-01-01 10 9.8 98
## 4 NYC 2022-01-01 11:00:00 2022-01-01 11 10.2 99
## 5 NYC 2022-01-02 00:00:00 2022-01-02 0 9.7 99
## 6 NYC 2022-01-02 02:00:00 2022-01-02 2 9.7 97
## 7 NYC 2022-01-02 03:00:00 2022-01-02 3 9.7 100
## 8 NYC 2022-01-02 05:00:00 2022-01-02 5 9.7 99
## 9 NYC 2022-01-02 12:00:00 2022-01-02 12 12.5 92
## 10 NYC 2022-01-02 16:00:00 2022-01-02 16 12.4 90
## # ℹ 13,119 more rows
## # ℹ 78 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
Distribution of errors from the random forest model is explored:
rfSubCloudPred %>%
mutate(err=pred-cloudcover,
err2=err**2,
rnd5=round(cloudcover/5)*5,
rndCat=case_when(cloudcover<10~"1) clear (<10)",
cloudcover<50~"2) partly (10-50)",
cloudcover<90~"3) mostly (50-90)",
TRUE~"4) cloudy (>90)"
)
) %>%
ggplot(aes(x=err)) +
geom_histogram(fill="lightblue") +
labs(title="Errors in linear model cloud cover prediction by amount of clouds",
x="Error (Predicted minus Actual)",
y="# Observations"
) +
facet_wrap(~rndCat, scales="free")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
There are essentially no prediction errors at any level of overall cloudiness
The impact of varying mtry is also explored:
sapply(1:3, FUN=function(mt) {
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="cloudcover",
xVars=c(varsTrain[str_detect(varsTrain, pattern="cloudcover_")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=mt,
rndTo=-1L,
refXY=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
}
)
## Growing trees.. Progress: 98%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.115% (RMSE 3.41 vs. 36.27 null)
## Growing trees.. Progress: 59%. Estimated remaining time: 21 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.15 vs. 36.27 null)
## Growing trees.. Progress: 35%. Estimated remaining time: 58 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.998% (RMSE 0.17 vs. 36.27 null)
## [1] 0.9911463 0.9999822 0.9999787
With mtry=1 (single variable per tree), R-squared on the test data is slightly over 99%. With mtry=2 or mtry=3, R-squared on the test data is almost exactly 100%
A model is run to predict rain, at first allowing precipitation and snowfall as predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRainFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="rain",
xVars=c(varsTrain[!str_detect(varsTrain, "^rain$")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 28%. Estimated remaining time: 1 minute, 21 seconds.
## Growing trees.. Progress: 58%. Estimated remaining time: 44 seconds.
## Growing trees.. Progress: 87%. Estimated remaining time: 13 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.386% (RMSE 0.16 vs. 0.6 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is effective at predicting rain, primarily by leveraging highly associated predictors precipitation, and weather code. The model generally under-predicts high rainfall observations
A similar process is run using the linear model:
# Eliminate diffuse radiation due to rank-deficiency
lmRainFull <- lm(rain ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmRainFull)
##
## Call:
## lm(formula = rain ~ ., data = allCity %>% filter(tt == "train",
## year < 2022) %>% select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.006873 -0.000317 0.000062 0.000344 0.095935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.313e-02 4.414e-03 16.567 < 2e-16 ***
## hour -1.486e-06 1.159e-06 -1.282 0.199932
## temperature_2m -2.797e-04 1.101e-05 -25.397 < 2e-16 ***
## relativehumidity_2m -1.799e-05 1.105e-06 -16.280 < 2e-16 ***
## dewpoint_2m 2.025e-05 3.676e-06 5.508 3.63e-08 ***
## apparent_temperature 1.623e-04 9.273e-06 17.498 < 2e-16 ***
## pressure_msl -1.817e-05 1.645e-06 -11.047 < 2e-16 ***
## surface_pressure 7.054e-06 7.325e-07 9.630 < 2e-16 ***
## precipitation 9.991e-01 1.545e-05 64674.862 < 2e-16 ***
## snowfall -1.428e+00 1.367e-04 -10451.677 < 2e-16 ***
## cloudcover 1.009e-05 7.635e-07 13.217 < 2e-16 ***
## cloudcover_low -8.663e-06 5.741e-07 -15.091 < 2e-16 ***
## cloudcover_mid -5.018e-06 4.421e-07 -11.351 < 2e-16 ***
## cloudcover_high -5.233e-06 2.753e-07 -19.013 < 2e-16 ***
## shortwave_radiation 2.791e-08 1.715e-07 0.163 0.870683
## direct_radiation -1.606e-07 1.831e-07 -0.877 0.380283
## direct_normal_irradiance 1.079e-07 6.804e-08 1.585 0.112955
## windspeed_10m 4.659e-05 4.417e-06 10.548 < 2e-16 ***
## windspeed_100m -1.024e-05 2.782e-06 -3.681 0.000232 ***
## winddirection_10m -7.432e-08 1.125e-07 -0.661 0.508879
## winddirection_100m -1.548e-07 1.135e-07 -1.365 0.172369
## windgusts_10m -1.217e-05 1.496e-06 -8.139 4.01e-16 ***
## et0_fao_evapotranspiration -7.852e-04 1.987e-04 -3.951 7.77e-05 ***
## weathercode 6.502e-05 5.875e-07 110.678 < 2e-16 ***
## vapor_pressure_deficit 5.257e-04 2.153e-05 24.415 < 2e-16 ***
## soil_temperature_0_to_7cm 5.228e-06 3.289e-06 1.590 0.111923
## soil_temperature_7_to_28cm -5.662e-06 5.539e-06 -1.022 0.306660
## soil_temperature_28_to_100cm -5.370e-06 6.121e-06 -0.877 0.380292
## soil_temperature_100_to_255cm 2.837e-07 3.154e-06 0.090 0.928331
## soil_moisture_0_to_7cm -2.339e-03 1.891e-04 -12.370 < 2e-16 ***
## soil_moisture_7_to_28cm 1.420e-03 2.696e-04 5.268 1.38e-07 ***
## soil_moisture_28_to_100cm 1.086e-04 2.032e-04 0.535 0.592908
## soil_moisture_100_to_255cm 2.417e-04 1.998e-04 1.210 0.226269
## year -2.917e-05 2.129e-06 -13.700 < 2e-16 ***
## doy -3.149e-07 8.502e-08 -3.704 0.000212 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004157 on 368075 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 1.808e+08 on 34 and 368075 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(pred=predict(lmRainFull, newdata=.)) %>%
summarize(meModel=mean((pred-rain)**2),
meBase=mean((rain-mean(rain))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.00000172 0.354 1.00 0.00131
summary(lmRainFull)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 35 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 precipitation 0.999 0.0000154 64675. 0
## 2 snowfall -1.43 0.000137 -10452. 0
## 3 weathercode 0.0000650 0.000000588 111. 0
## 4 temperature_2m -0.000280 0.0000110 -25.4 3.65e-142
## 5 vapor_pressure_deficit 0.000526 0.0000215 24.4 1.50e-131
## 6 cloudcover_high -0.00000523 0.000000275 -19.0 1.46e- 80
## 7 apparent_temperature 0.000162 0.00000927 17.5 1.59e- 68
## 8 (Intercept) 0.0731 0.00441 16.6 1.26e- 61
## 9 relativehumidity_2m -0.0000180 0.00000110 -16.3 1.45e- 59
## 10 cloudcover_low -0.00000866 0.000000574 -15.1 1.92e- 51
## # ℹ 25 more rows
The linear model has very strong explanatory and predictive power. Rain (mm) appears defined in the raw data as precipitation (mm) minus snowfall (cm) divided by 0.7:
# Eliminate diffuse radiation due to rank-deficiency
lmRainTwo <- lm(rain ~ precipitation + snowfall,
data=allCity %>%
filter(tt=="train", year<2022) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmRainTwo)
##
## Call:
## lm(formula = rain ~ precipitation + snowfall, data = allCity %>%
## filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>%
## select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.019490 -0.000178 -0.000178 -0.000178 0.099488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.777e-04 7.136e-06 24.9 <2e-16 ***
## precipitation 9.999e-01 1.307e-05 76484.8 <2e-16 ***
## snowfall -1.424e+00 1.343e-04 -10602.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004261 on 368107 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 2.925e+09 on 2 and 368107 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(pred=predict(lmRainTwo, newdata=.)) %>%
summarize(meModel=mean((pred-rain)**2),
meBase=mean((rain-mean(rain))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.000000833 0.354 1.00 0.000913
summary(lmRainTwo)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 3 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 precipitation 1.00 0.0000131 76485. 0
## 2 snowfall -1.42 0.000134 -10603. 0
## 3 (Intercept) 0.000178 0.00000714 24.9 8.99e-137
The random forest model is re-run to predict rain, using only precipitation and snowfall as predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRainTwo <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="rain",
xVars=c("precipitation", "snowfall"),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
mtry=2,
refXY=TRUE,
returnData=TRUE
)[["tstPred"]]
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.996% (RMSE 0 vs. 0.6 null)
## `geom_smooth()` using formula = 'y ~ x'
The random forest model is similarly effective at predicting rain using precipitation and snowfall
All combinations of two variables are explored for predicting rain on a smaller training dataset:
# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possRainVars <- c(varsTrain[!str_detect(varsTrain, "rain")], "month", "tod")
# Subsets to use
set.seed(24081818)
idxSmallRain <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallRain <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possRainVars)-1)) {
for(idx2 in (idx1+1):length(possRainVars)) {
r2SmallRain <- runFullRF(dfTrain=dfTrainCloud[idxSmallRain,],
yVar="rain",
xVars=possRainVars[c(idx1, idx2)],
dfTest=dfTestCloud,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallRain <- rbind(mtxSmallRain, c(idx1, idx2, r2SmallRain))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.708% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.364% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.644% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.527% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.8% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.14% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.315% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.238% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.015% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.062% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.62% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.133% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.818% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.185% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.803% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.037% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -45.453% (RMSE 0.72 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.942% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.651% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.902% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.551% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.19% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.312% (RMSE 0.22 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.804% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.522% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.274% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.98% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.79% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.428% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.342% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.33% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.011% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.574% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.104% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.375% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.238% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.208% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.125% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.098% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.15% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.43% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.975% (RMSE 0.13 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.921% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.4% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.092% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.524% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.772% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.088% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.517% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.366% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.742% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.821% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.394% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.131% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.84% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.915% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.407% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.987% (RMSE 0.25 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.187% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.972% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.826% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.226% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.239% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.906% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.736% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.9% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.198% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.943% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.016% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.189% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.242% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.323% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.372% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.186% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.173% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.443% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.307% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.599% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.027% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.808% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.286% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.044% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.833% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.791% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.448% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.832% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.789% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.799% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.871% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.185% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.77% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.678% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.407% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.922% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.738% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.599% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.665% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.126% (RMSE 0.55 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.29% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.634% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.639% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.165% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.71% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.893% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.044% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.063% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.414% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.088% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.037% (RMSE 0.13 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.79% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.708% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.024% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.345% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -33.817% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.344% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.161% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.599% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.102% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.362% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.238% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.514% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.183% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.826% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.657% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.715% (RMSE 0.22 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.399% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.817% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.993% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.805% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.859% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.722% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.108% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.589% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.569% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.725% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.537% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.506% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.48% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.449% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.126% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.156% (RMSE 0.13 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.466% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.861% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.051% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.405% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.49% (RMSE 0.7 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.977% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.457% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.062% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.544% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -31.802% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.301% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.403% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.426% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.78% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.019% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.661% (RMSE 0.23 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.346% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.752% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.89% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.526% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.745% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.334% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.451% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.14% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.716% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.952% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.259% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.723% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.958% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.639% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.148% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.518% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.274% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.449% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.771% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.258% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.174% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.566% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.01% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.585% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.052% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.431% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.672% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.394% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.781% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.309% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.167% (RMSE 0.22 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.549% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.129% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.978% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.526% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.96% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.655% (RMSE 0.56 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.396% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.061% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.684% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.571% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.848% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.935% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.847% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.183% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.295% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.492% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.311% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.759% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.675% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.074% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.328% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.684% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.167% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -31.574% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.041% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.254% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.82% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.286% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.879% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.106% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.934% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.516% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.27% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.794% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.981% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.732% (RMSE 0.56 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.666% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.379% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.892% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.327% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.89% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.27% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.231% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.739% (RMSE 0.12 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.22% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.78% (RMSE 0.15 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.591% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.082% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.849% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.663% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.641% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.564% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.31% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.132% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.048% (RMSE 0.15 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.211% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.311% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.628% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.838% (RMSE 0.12 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.193% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.261% (RMSE 0.13 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.771% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.33% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.739% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.586% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.672% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.94% (RMSE 0.15 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.798% (RMSE 0.15 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.524% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.681% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.995% (RMSE 0.13 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.749% (RMSE 0.14 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.102% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.927% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.167% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.709% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.91% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.015% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.774% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.255% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.021% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.885% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.058% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.235% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.89% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.082% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.162% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.042% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.738% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.233% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.759% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.056% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.861% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.273% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.026% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.525% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.032% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.555% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.278% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.023% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.194% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.207% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.151% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.624% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.509% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.748% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.045% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.414% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.501% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.66% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.694% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.881% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.113% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.347% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.765% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.484% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.305% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.325% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.317% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.556% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.57% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.04% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.196% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.656% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.947% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.516% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.378% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.365% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.944% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.254% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.26% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.001% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.588% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.465% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.268% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.283% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.313% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.633% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.452% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.321% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.292% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.935% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.664% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.652% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.11% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.543% (RMSE 0.55 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.02% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.747% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.317% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.022% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.64% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.655% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.675% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.036% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.628% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.867% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.901% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.045% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.326% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.851% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.478% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.335% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.372% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.372% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.702% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.003% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.221% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.726% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.002% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -56.812% (RMSE 0.75 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.517% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.983% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.037% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.347% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.863% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.681% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.205% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.326% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.862% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.745% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.577% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.088% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.675% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.836% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.307% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.54% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.295% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.498% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.83% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.623% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.216% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.027% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.408% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -34.21% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.332% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.948% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.122% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.149% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.175% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.482% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.99% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.615% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.577% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.123% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.544% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -39.604% (RMSE 0.7 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.294% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.701% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.565% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.206% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.214% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.898% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.94% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.474% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.946% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.021% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.088% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.802% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.137% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.868% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.956% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.918% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.208% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.644% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.329% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.378% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.515% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -38.13% (RMSE 0.7 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.453% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.524% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.165% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.656% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.258% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.748% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.006% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.463% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.892% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.735% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.204% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.084% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.278% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.049% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.398% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.334% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.547% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.402% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.671% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.408% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.773% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.251% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.059% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.873% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.135% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.931% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.931% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.117% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.976% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.042% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.221% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.119% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.371% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.54% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.609% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.267% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.766% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.849% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.876% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.608% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.277% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.319% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.651% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.016% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.533% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.918% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.547% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.608% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.08% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.426% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.57% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.663% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.224% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.071% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.241% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.62% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.198% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.357% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.681% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.119% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -49.33% (RMSE 0.73 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -47.749% (RMSE 0.72 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.401% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -41.853% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.311% (RMSE 0.24 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.741% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -36.112% (RMSE 0.69 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.547% (RMSE 0.68 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.161% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.933% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.136% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.815% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.567% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -42.857% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -43.301% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -47.053% (RMSE 0.72 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -40.644% (RMSE 0.71 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.408% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.102% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.942% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.669% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.261% (RMSE 0.24 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.196% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.03% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.578% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.625% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.179% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.653% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.926% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.119% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.589% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.943% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.878% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.292% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.664% (RMSE 0.66 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.579% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.605% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.39% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.519% (RMSE 0.23 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.04% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.27% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.966% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.675% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.847% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.838% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.636% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.903% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.034% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.026% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.164% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.341% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.195% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.698% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.157% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.449% (RMSE 0.22 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.315% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.657% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.057% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.331% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.712% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.398% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.795% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.446% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.251% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.063% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.173% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.811% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.81% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.442% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.023% (RMSE 0.22 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.746% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.173% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.016% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.312% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.152% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.399% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.208% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.484% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.442% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.659% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.777% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.176% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.664% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.603% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.177% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.705% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.456% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.806% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.474% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.472% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.948% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.699% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.91% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.437% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.822% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.805% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.291% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.684% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.004% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.526% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.08% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.94% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.682% (RMSE 0.19 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.134% (RMSE 0.23 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.99% (RMSE 0.24 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.626% (RMSE 0.23 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.119% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.115% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.417% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.606% (RMSE 0.21 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.885% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.636% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.918% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.148% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.989% (RMSE 0.55 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.883% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.67% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.727% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.253% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.88% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.735% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.881% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.427% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.531% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.677% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.241% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.862% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.678% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.025% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.166% (RMSE 0.65 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.656% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.45% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.541% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.924% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.197% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.496% (RMSE 0.58 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.568% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.988% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.457% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.771% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.285% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.895% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.882% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.326% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.06% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.223% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.521% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.941% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.584% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.621% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.372% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.206% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.126% (RMSE 0.56 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.249% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.529% (RMSE 0.67 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.629% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.391% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.366% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.132% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.104% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.9% (RMSE 0.53 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.785% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.492% (RMSE 0.55 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.872% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.157% (RMSE 0.57 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.399% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.882% (RMSE 0.59 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.54% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.355% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.886% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.123% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.996% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.486% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.089% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.524% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.976% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.478% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.69% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.083% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.142% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.54% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.392% (RMSE 0.62 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.469% (RMSE 0.64 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.912% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.144% (RMSE 0.6 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.691% (RMSE 0.61 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.73% (RMSE 0.63 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.602% (RMSE 0.6 vs. 0.6 null)
Predictive success by metric is explored:
dfSmallR2Rain <- as.data.frame(mtxSmallRain) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possRainVars[idx1], var2=possRainVars[idx2], rn=row_number())
dfSmallR2Rain %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 precipitation weathercode 0.958
## 2 precipitation snowfall 0.957
## 3 precipitation soil_temperature_0_to_7cm 0.953
## 4 apparent_temperature precipitation 0.952
## 5 dewpoint_2m precipitation 0.950
## 6 precipitation month 0.950
## 7 temperature_2m precipitation 0.950
## 8 precipitation shortwave_radiation 0.948
## 9 precipitation soil_temperature_7_to_28cm 0.948
## 10 precipitation tod 0.947
## 11 precipitation soil_temperature_100_to_255cm 0.947
## 12 precipitation doy 0.947
## 13 precipitation soil_moisture_7_to_28cm 0.947
## 14 precipitation direct_radiation 0.947
## 15 precipitation direct_normal_irradiance 0.946
## 16 precipitation et0_fao_evapotranspiration 0.946
## 17 precipitation cloudcover_mid 0.946
## 18 precipitation soil_moisture_0_to_7cm 0.946
## 19 precipitation diffuse_radiation 0.946
## 20 precipitation year 0.945
## # ℹ 646 more rows
dfSmallR2Rain %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting rain",
y="Range of R-squared (min-mean-max)",
x=NULL
)
dfSmallR2Rain %>%
arrange(desc(r2)) %>%
filter(var2!="precipitation", var1!="precipitation") %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 630 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 weathercode soil_moisture_0_to_7cm 0.897
## 2 weathercode soil_temperature_100_to_255cm 0.889
## 3 weathercode vapor_pressure_deficit 0.887
## 4 relativehumidity_2m weathercode 0.887
## 5 weathercode month 0.884
## 6 snowfall weathercode 0.882
## 7 weathercode year 0.881
## 8 weathercode doy 0.881
## 9 direct_normal_irradiance weathercode 0.879
## 10 diffuse_radiation weathercode 0.879
## 11 shortwave_radiation weathercode 0.879
## 12 cloudcover_high weathercode 0.878
## 13 direct_radiation weathercode 0.877
## 14 cloudcover_mid weathercode 0.877
## 15 weathercode tod 0.876
## 16 et0_fao_evapotranspiration weathercode 0.876
## 17 weathercode soil_temperature_7_to_28cm 0.875
## 18 cloudcover weathercode 0.873
## 19 cloudcover_low weathercode 0.873
## 20 surface_pressure weathercode 0.871
## # ℹ 610 more rows
dfSmallR2Rain %>%
filter(var2!="precipitation", var1!="precipitation", var2!="weathercode", var1!="weathercode") %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting cloud cover (excluding variable paired with 'precipitation' or 'weathercode')",
y="Range of R-squared (min-mean-max)",
x=NULL
)
dfSmallR2Rain %>%
arrange(desc(r2)) %>%
filter(var2!="precipitation", var1!="precipitation", var2!="weathercode", var1!="weathercode") %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 595 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 soil_moisture_0_to_7cm soil_moisture_7_to_28cm 0.199
## 2 vapor_pressure_deficit soil_moisture_0_to_7cm 0.150
## 3 cloudcover_low soil_moisture_0_to_7cm 0.145
## 4 soil_moisture_0_to_7cm soil_moisture_100_to_255cm 0.135
## 5 relativehumidity_2m soil_moisture_0_to_7cm 0.131
## 6 soil_temperature_100_to_255cm soil_moisture_0_to_7cm 0.121
## 7 surface_pressure soil_moisture_0_to_7cm 0.117
## 8 pressure_msl soil_moisture_0_to_7cm 0.107
## 9 soil_moisture_0_to_7cm soil_moisture_28_to_100cm 0.0979
## 10 windgusts_10m soil_moisture_0_to_7cm 0.0840
## 11 soil_moisture_0_to_7cm doy 0.0816
## 12 windspeed_10m soil_moisture_0_to_7cm 0.0814
## 13 soil_temperature_28_to_100cm soil_moisture_0_to_7cm 0.0806
## 14 temperature_2m soil_moisture_0_to_7cm 0.0791
## 15 dewpoint_2m soil_moisture_0_to_7cm 0.0772
## 16 windspeed_100m soil_moisture_0_to_7cm 0.0765
## 17 cloudcover soil_moisture_0_to_7cm 0.0756
## 18 direct_radiation soil_moisture_0_to_7cm 0.0720
## 19 direct_normal_irradiance soil_moisture_0_to_7cm 0.0712
## 20 snowfall soil_moisture_0_to_7cm 0.0686
## # ℹ 575 more rows
Precipitation and weather code are highly predictive of rainfall, but most other predictors drive zero or even negative R-squared when applied in the test data set
Select combinations are explored using the full training dataset:
possLargeRain <- c("precipitation",
"weathercode",
"snowfall",
"soil_moisture_0_to_7cm"
)
possLargeRain
## [1] "precipitation" "weathercode" "snowfall"
## [4] "soil_moisture_0_to_7cm"
mtxLargeRain <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeRain)-1)) {
for(idx2 in (idx1+1):length(possLargeRain)) {
r2LargeRain <- runFullRF(dfTrain=dfTrainCloud[,],
yVar="rain",
xVars=possLargeRain[c(idx1, idx2)],
dfTest=dfTestCloud,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLargeRain <- rbind(mtxLargeRain, c(idx1, idx2, r2LargeRain))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.988% (RMSE 0.01 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.997% (RMSE 0 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.565% (RMSE 0.07 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.48% (RMSE 0.2 vs. 0.6 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.942% (RMSE 0.2 vs. 0.6 null)
## Growing trees.. Progress: 80%. Estimated remaining time: 7 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.938% (RMSE 0.53 vs. 0.6 null)
dfLargeR2Rain <- as.data.frame(mtxLargeRain) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeRain[idx1], var2=possLargeRain[idx2], rn=row_number())
dfLargeR2Rain %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 precipitation snowfall 1.00
## 2 precipitation weathercode 1.00
## 3 precipitation soil_moisture_0_to_7cm 0.986
## 4 weathercode soil_moisture_0_to_7cm 0.889
## 5 weathercode snowfall 0.885
## 6 snowfall soil_moisture_0_to_7cm 0.219
In contrast to previous models, R2 for predicting rain is significantly improved by access to a much larger training dataset
A model is run to predict snowfall, at first allowing precipitation and rain as predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfSnowFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="snowfall",
xVars=c(varsTrain[!str_detect(varsTrain, "^snowfall$")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 41%. Estimated remaining time: 43 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.034% (RMSE 0.01 vs. 0.04 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is reasonably effective at predicting snowfall, primarily by leveraging highly associated predictors precipitation and weather code. The model generally under-predicts high rainfall observations
A similar process is run using the linear model:
# Eliminate diffuse radiation due to rank-deficiency
lmSnowFull <- lm(snowfall ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmSnowFull)
##
## Call:
## lm(formula = snowfall ~ ., data = allCity %>% filter(tt == "train",
## year < 2022) %>% mutate(weathercode = factor(weathercode)) %>%
## select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.020446 -0.000074 -0.000007 0.000065 0.075432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.268e-02 2.648e-03 12.341 < 2e-16 ***
## hour -3.658e-07 6.946e-07 -0.527 0.598452
## temperature_2m -3.363e-05 6.624e-06 -5.077 3.84e-07 ***
## relativehumidity_2m -4.568e-06 6.629e-07 -6.891 5.54e-12 ***
## dewpoint_2m 3.111e-06 2.204e-06 1.411 0.158225
## apparent_temperature 1.942e-05 5.567e-06 3.489 0.000486 ***
## pressure_msl -1.782e-06 9.866e-07 -1.807 0.070819 .
## surface_pressure -1.605e-06 4.395e-07 -3.652 0.000261 ***
## precipitation 6.845e-01 1.434e-04 4774.330 < 2e-16 ***
## rain -6.848e-01 1.456e-04 -4701.844 < 2e-16 ***
## cloudcover 8.165e-06 7.226e-07 11.298 < 2e-16 ***
## cloudcover_low -5.583e-06 3.729e-07 -14.972 < 2e-16 ***
## cloudcover_mid -4.398e-06 2.786e-07 -15.789 < 2e-16 ***
## cloudcover_high -2.148e-06 1.763e-07 -12.183 < 2e-16 ***
## shortwave_radiation 3.950e-07 1.028e-07 3.841 0.000123 ***
## direct_radiation -3.277e-07 1.099e-07 -2.983 0.002857 **
## direct_normal_irradiance -2.862e-08 4.077e-08 -0.702 0.482691
## windspeed_10m -1.466e-05 2.655e-06 -5.520 3.39e-08 ***
## windspeed_100m 5.995e-06 1.668e-06 3.593 0.000327 ***
## winddirection_10m -1.023e-07 6.741e-08 -1.517 0.129156
## winddirection_100m -8.782e-08 6.798e-08 -1.292 0.196407
## windgusts_10m 4.359e-06 8.984e-07 4.852 1.22e-06 ***
## et0_fao_evapotranspiration -2.485e-04 1.191e-04 -2.087 0.036930 *
## weathercode1 -3.130e-05 1.771e-05 -1.768 0.077143 .
## weathercode2 -8.146e-05 3.183e-05 -2.559 0.010504 *
## weathercode3 -3.591e-05 4.193e-05 -0.856 0.391780
## weathercode51 8.037e-05 3.765e-05 2.135 0.032762 *
## weathercode53 2.770e-04 4.986e-05 5.557 2.75e-08 ***
## weathercode55 4.158e-04 7.061e-05 5.889 3.90e-09 ***
## weathercode61 6.275e-04 6.800e-05 9.228 < 2e-16 ***
## weathercode63 1.233e-03 1.118e-04 11.027 < 2e-16 ***
## weathercode65 3.267e-03 3.062e-04 10.668 < 2e-16 ***
## weathercode71 2.160e-02 6.684e-05 323.091 < 2e-16 ***
## weathercode73 8.795e-03 1.088e-04 80.865 < 2e-16 ***
## weathercode75 2.948e-02 3.019e-04 97.647 < 2e-16 ***
## vapor_pressure_deficit 2.460e-05 1.296e-05 1.899 0.057580 .
## soil_temperature_0_to_7cm 1.165e-06 1.971e-06 0.591 0.554292
## soil_temperature_7_to_28cm 3.702e-06 3.319e-06 1.115 0.264681
## soil_temperature_28_to_100cm -2.414e-06 3.667e-06 -0.658 0.510354
## soil_temperature_100_to_255cm 7.237e-07 1.890e-06 0.383 0.701766
## soil_moisture_0_to_7cm 2.170e-04 1.137e-04 1.909 0.056317 .
## soil_moisture_7_to_28cm -7.146e-05 1.618e-04 -0.442 0.658664
## soil_moisture_28_to_100cm -1.393e-05 1.218e-04 -0.114 0.908943
## soil_moisture_100_to_255cm 6.144e-04 1.197e-04 5.131 2.89e-07 ***
## year -1.441e-05 1.277e-06 -11.287 < 2e-16 ***
## doy -1.886e-08 5.095e-08 -0.370 0.711314
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002491 on 368064 degrees of freedom
## Multiple R-squared: 0.9978, Adjusted R-squared: 0.9978
## F-statistic: 3.674e+06 on 45 and 368064 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmSnowFull, newdata=.)) %>%
summarize(meModel=mean((pred-snowfall)**2),
meBase=mean((snowfall-mean(snowfall))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.00000244 0.00193 0.999 0.00156
summary(lmSnowFull)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 precipitation 0.685 0.000143 4774. 0
## 2 rain -0.685 0.000146 -4702. 0
## 3 weathercode71 0.0216 0.0000668 323. 0
## 4 weathercode75 0.0295 0.000302 97.6 0
## 5 weathercode73 0.00879 0.000109 80.9 0
## 6 cloudcover_mid -0.00000440 0.000000279 -15.8 3.84e-56
## 7 cloudcover_low -0.00000558 0.000000373 -15.0 1.16e-50
## 8 (Intercept) 0.0327 0.00265 12.3 5.54e-35
## 9 cloudcover_high -0.00000215 0.000000176 -12.2 3.90e-34
## 10 cloudcover 0.00000816 0.000000723 11.3 1.36e-29
## # ℹ 36 more rows
Even with many confounders, the linear model largely identifies that precipitation and rain predict snowfall. As well, the linear model identifies weather codes 71, 73, and 75 which each mean that snow is falling
The linear model has very strong explanatory and predictive power. Snowfall (cm) appears defined in the raw data as 7 * (precipitation (mm) minus rain (mm)):
# Best predictors only
lmSnowTwo <- lm(snowfall ~ precipitation + rain,
data=allCity %>%
filter(tt=="train", year<2022) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmSnowTwo)
##
## Call:
## lm(formula = snowfall ~ precipitation + rain, data = allCity %>%
## filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>%
## select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.000220 -0.000131 -0.000131 -0.000131 0.069869
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.313e-04 5.003e-06 26.25 <2e-16 ***
## precipitation 7.000e-01 6.535e-05 10712.68 <2e-16 ***
## rain -7.001e-01 6.603e-05 -10602.88 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002988 on 368107 degrees of freedom
## Multiple R-squared: 0.9968, Adjusted R-squared: 0.9968
## F-statistic: 5.738e+07 on 2 and 368107 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(pred=predict(lmSnowTwo, newdata=.)) %>%
summarize(meModel=mean((pred-snowfall)**2),
meBase=mean((snowfall-mean(snowfall))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.000000389 0.00193 1.00 0.000624
As well, since ‘weathercode’ indicates whether it is snowing, the combination with precipitation has reasonable predictive power on snowfall:
# Precipitation and weather code as factor
lmSnowWCP <- lm(snowfall ~ precipitation:weathercode,
data=allCity %>%
mutate(weathercode=factor(weathercode)) %>%
filter(tt=="train", year<2022) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmSnowWCP)
##
## Call:
## lm(formula = snowfall ~ precipitation:weathercode, data = allCity %>%
## mutate(weathercode = factor(weathercode)) %>% filter(tt ==
## "train", year < 2022) %>% select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.26512 -0.00083 -0.00083 -0.00083 0.32684
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.258e-04 2.696e-05 30.631 < 2e-16 ***
## precipitation:weathercode0 NA NA NA NA
## precipitation:weathercode1 NA NA NA NA
## precipitation:weathercode2 NA NA NA NA
## precipitation:weathercode3 NA NA NA NA
## precipitation:weathercode51 -3.225e-03 5.057e-04 -6.378 1.8e-10 ***
## precipitation:weathercode53 -1.194e-03 2.856e-04 -4.181 2.9e-05 ***
## precipitation:weathercode55 -7.506e-04 3.110e-04 -2.413 0.015802 *
## precipitation:weathercode61 -4.578e-04 1.348e-04 -3.396 0.000683 ***
## precipitation:weathercode63 -1.939e-04 7.118e-05 -2.724 0.006442 **
## precipitation:weathercode65 -6.735e-05 7.805e-05 -0.863 0.388240
## precipitation:weathercode71 1.356e-01 8.931e-04 151.859 < 2e-16 ***
## precipitation:weathercode73 4.668e-01 5.246e-04 889.991 < 2e-16 ***
## precipitation:weathercode75 6.348e-01 3.643e-04 1742.748 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01562 on 368100 degrees of freedom
## Multiple R-squared: 0.9127, Adjusted R-squared: 0.9127
## F-statistic: 4.275e+05 on 9 and 368100 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmSnowWCP, newdata=.)) %>%
summarize(meModel=mean((pred-snowfall)**2),
meBase=mean((snowfall-mean(snowfall))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `pred = predict(lmSnowWCP, newdata = .)`.
## Caused by warning in `predict.lm()`:
## ! prediction from a rank-deficient fit may be misleading
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.000119 0.00193 0.938 0.0109
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmSnowWCP, newdata=.)) %>%
mutate(across(.cols=where(is.numeric), .fns=function(x) autoRound(x, rndTo=0.01))) %>%
count(snowfall, pred) %>%
ggplot(aes(x=pred, y=snowfall)) +
geom_point(aes(size=n)) +
labs(title="Actual vs. Predicted Snowfall\n(linear model with precipitation and weather code)") +
geom_smooth(method="lm", aes(weight=n)) +
geom_abline(slope=1, intercept=0, lty=2, color="red")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `pred = predict(lmSnowWCP, newdata = .)`.
## Caused by warning in `predict.lm()`:
## ! prediction from a rank-deficient fit may be misleading
## `geom_smooth()` using formula = 'y ~ x'
The relationship between weathercode and precipitation/snow is explored:
allCity %>%
mutate(weathercode=factor(weathercode)) %>%
group_by(weathercode) %>%
summarize(across(.cols=c("precipitation", "snowfall"), .fns=mean)) %>%
pivot_longer(cols=-c(weathercode)) %>%
ggplot(aes(x=weathercode, y=value)) +
geom_col(fill="lightblue") +
geom_text(aes(y=value/2, label=round(value, 1)), size=2.5) +
facet_wrap(~name, scales="free_y") +
labs(title="Average precipitation (mm) and snow (cm) by weathercode",
x=NULL,
y="Precip (mm) or Snowfall (cm)"
)
allCity %>%
filter(precipitation>0) %>%
mutate(weathercode=factor(weathercode), pctSnow=snowfall/(0.7*precipitation)) %>%
ggplot(aes(x=weathercode, y=pctSnow)) +
geom_boxplot(fill="lightblue") +
labs(title="Percent of precipitation as snowfall by weathercode",
x=NULL,
y="Snowfall (cm) divided by\n(0.7 * precipitation (mm))"
)
While rain is sometimes falling during snow events, in general the precipitation falls mainly or enitrely as snow during weathercode 71, 73, and 75. There is no snowfall under other weathercode
A model is run to predict weathercode, at first allowing precipitation, rain, and snowfall as predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWCFull <- runFullRF(dfTrain=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c(varsTrain[!str_detect(varsTrain, "^weathercode$")]),
dfTest=allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 34%. Estimated remaining time: 1 minute, 0 seconds.
## Growing trees.. Progress: 67%. Estimated remaining time: 29 seconds.
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.634%
Accuracy is extremely high, with weathercode being strongly linked to cloud cover, precipitation rate, and percentage of precipitation falling as snow
Relationships between precipitation, rain, snow, clouds, and weathercode are further explored:
# Precipitation type by weather code
allCity %>%
mutate(isSnow=snowfall>0, isRain=rain>0) %>%
group_by(isSnow, isRain, weathercode) %>%
summarize(n=n(),
across(.cols=c("rain", "snowfall", "precipitation", "cloudcover"), .fns=function(x) sum(x)),
.groups="drop") %>%
mutate(pType=case_when(isSnow & isRain~"SNRA", isSnow & !isRain~"SN", !isSnow & isRain~"RA", TRUE~"None"),
musn=snowfall/n,
mura=rain/n,
mucc=cloudcover/n) %>%
ggplot(aes(y=pType, x=factor(weathercode))) +
geom_tile(aes(fill=n)) +
scale_fill_continuous(low="white", high="lightgreen") +
geom_text(aes(label=n), size=2.5) +
labs(title="Precipitation types by weather code", x="Weather code", y="Precipitation type")
# Cloud cover by weather code
allCity %>%
ggplot(aes(x=factor(weathercode), y=cloudcover)) +
geom_boxplot(fill="lightblue") +
labs(title="Cloud cover by weather code", x="Weather code", y="Cloud cover (%)")
# Rain by weather code
allCity %>%
ggplot(aes(x=factor(weathercode), y=rain)) +
geom_boxplot(fill="lightblue") +
labs(title="Rain by weather code", x="Weather code", y="Rain (mm)")
# Snow by weather code
allCity %>%
ggplot(aes(x=factor(weathercode), y=snowfall)) +
geom_boxplot(fill="lightblue") +
labs(title="Snow by weather code", x="Weather code", y="Snow (cm)")
# Table of results
allCity %>%
mutate(isSnow=snowfall>0, isRain=rain>0) %>%
group_by(isSnow, isRain, weathercode) %>%
summarize(n=n(),
maxcc=max(cloudcover),
mincc=min(cloudcover),
across(.cols=c("rain", "snowfall", "precipitation", "cloudcover"), .fns=function(x) sum(x)),
.groups="drop") %>%
mutate(pType=case_when(isSnow & isRain~"SNRA", isSnow & !isRain~"SN", !isSnow & isRain~"RA", TRUE~"None"),
muprecip=precipitation/n,
musn=snowfall/n,
mura=rain/n,
mucc=cloudcover/n
) %>%
select(-rain, -snowfall, -precipitation, -cloudcover) %>%
select(weathercode, pType, isSnow, isRain, n, mincc, mucc, maxcc, everything())
## # A tibble: 16 × 11
## weathercode pType isSnow isRain n mincc mucc maxcc muprecip musn
## <int> <chr> <lgl> <lgl> <int> <int> <dbl> <int> <dbl> <dbl>
## 1 0 None FALSE FALSE 296257 0 3.61 20 0 0
## 2 1 None FALSE FALSE 126163 20 31.8 50 0 0
## 3 2 None FALSE FALSE 48304 50 63.7 80 0 0
## 4 3 None FALSE FALSE 72391 80 94.2 100 0 0
## 5 51 RA FALSE TRUE 32254 0 77.1 100 0.200 0
## 6 53 RA FALSE TRUE 10815 0 86.2 100 0.663 0
## 7 55 RA FALSE TRUE 3461 1 87.7 100 1.09 0
## 8 61 RA FALSE TRUE 7011 1 89.9 100 1.74 0
## 9 63 RA FALSE TRUE 4947 4 91.0 100 3.91 0
## 10 65 RA FALSE TRUE 551 20 93.0 100 11.0 0
## 11 71 SN TRUE FALSE 3192 0 93.4 100 0.0997 0.0906
## 12 73 SN TRUE FALSE 1800 0 97.2 100 0.525 0.368
## 13 75 SN TRUE FALSE 378 61 98.7 100 1.80 1.26
## 14 71 SNRA TRUE TRUE 521 21 95.0 100 0.610 0.0936
## 15 73 SNRA TRUE TRUE 553 29 96.9 100 1.03 0.396
## 16 75 SNRA TRUE TRUE 186 64 99.2 100 2.47 1.40
## # ℹ 1 more variable: mura <dbl>
Weather codes appear to be defined as:
Not surprisingly, the random forest is effective at pulling apart very clean data splits like these
A model is run to predict weathercode, using only cloud cover, precipitation, rain, and snowfall as predictors, and with random forest defaults (mtry=2 for 4 predictors):
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("cloudcover", "precipitation", "rain", "snowfall"),
dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.436%
Accuracy remains over 99%, with main errors being cloudiness classification when there is zero precipitation
The model is updated using mtry=4 and mtry=1:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("cloudcover", "precipitation", "rain", "snowfall"),
dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
rndTo=-1L,
mtry=1,
refXY=TRUE,
returnData=FALSE
)
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022) %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("cloudcover", "precipitation", "rain", "snowfall"),
dfTest=allCity %>% filter(tt=="test", year==2022) %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
rndTo=-1L,
mtry=4,
refXY=TRUE,
returnData=FALSE
)
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%
The model performs with much lower accuracy for mtry=1 rather than mtry=2, but accuracy is essentially identical (over 99%) for mtry=2 and mtry=4. The main issue with mtry=1 is inability to classify non-precipitation days since snowfall, rain, and precipitation as stand-alones do not distinguish degree of cloudiness
All combinations of two variables are explored for predicting weathercode on a smaller training dataset:
# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possWCVars <- c(varsTrain[!str_detect(varsTrain, "weathercode")], "month", "tod")
# Subsets to use
set.seed(24083015)
idxSmallWC <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWC <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possWCVars)-1)) {
for(idx2 in (idx1+1):length(possWCVars)) {
r2SmallWC <- runFullRF(dfTrain=dfTrainCloud[idxSmallWC,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=possWCVars[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]]
mtxSmallWC <- rbind(mtxSmallWC, c(idx1, idx2, r2SmallWC))
}
}
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.568%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.022%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.878%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.515%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.758%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.325%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.652%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.048%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.097%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.721%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.917%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.047%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.304%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.176%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.146%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.328%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.553%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.088%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.271%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.479%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.352%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.936%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.4%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.033%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.375%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.106%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.95%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.563%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.512%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.226%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.905%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.753%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.473%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.448%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.996%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.081%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.944%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.572%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.483%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.813%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.351%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.138%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.958%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.645%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.661%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.595%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.065%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.784%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.832%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.464%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.051%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.358%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.586%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.084%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.867%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.023%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.178%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.369%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.767%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.89%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.732%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.397%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.041%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.103%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.708%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.16%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.197%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.181%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.471%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.77%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.732%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.139%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.734%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.959%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.907%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.761%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.339%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.913%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.1%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.161%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.897%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.542%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.349%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.45%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.871%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.551%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.639%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.042%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.509%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.532%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.313%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.107%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.197%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.677%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.397%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.963%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.786%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.242%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.954%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.037%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.737%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.401%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.341%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.095%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.851%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.465%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.833%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.663%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.723%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.221%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.296%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.172%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.449%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.771%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.447%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.729%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.79%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.333%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.15%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.995%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.354%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.08%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.651%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.798%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.103%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.379%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.232%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.749%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.494%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.567%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.318%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.987%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.318%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.458%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.006%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.366%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.462%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.993%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.579%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.091%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.884%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.209%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.663%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.428%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.941%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.657%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.792%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.848%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.154%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.284%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.532%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.426%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.574%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.211%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.999%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.313%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.599%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.674%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.097%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.048%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.542%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.047%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.171%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.994%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.503%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.78%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.202%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.906%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.671%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.09%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.004%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.495%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.045%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.36%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.602%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.337%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.208%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.621%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.685%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.114%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.43%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.222%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.884%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.306%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.635%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.49%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.512%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.365%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.824%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.408%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.599%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.451%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.764%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.965%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.872%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.68%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.212%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.383%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.012%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.008%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.222%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.125%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.133%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.216%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.915%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.047%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.572%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.555%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.387%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.123%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.411%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.452%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.307%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.42%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.975%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.955%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.213%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.229%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.187%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.917%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.263%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.242%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.879%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.998%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.285%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.17%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.72%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.892%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.638%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.678%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.319%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.966%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.893%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.905%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.78%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.679%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.291%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.09%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.371%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.452%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.884%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.858%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.95%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.071%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.132%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.213%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.301%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.199%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.85%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.166%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.358%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.166%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.308%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.695%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.344%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.303%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.156%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.6%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.081%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.477%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.075%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.151%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.585%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.71%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.809%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.691%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.767%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.092%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.409%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.238%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.912%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.216%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.238%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.573%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.318%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.908%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.567%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.731%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.995%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.008%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.487%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.288%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.995%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.1%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.399%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.421%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.512%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.567%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.254%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.442%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.638%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.775%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.742%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.252%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.573%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.054%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.48%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.456%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.417%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.762%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.333%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.311%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.76%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.851%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.272%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.65%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.391%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.566%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.427%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.432%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.709%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.993%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.642%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.909%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.521%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.442%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.879%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.741%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.642%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.589%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.402%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.59%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.515%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.04%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.095%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.451%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.017%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.789%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.514%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.922%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.539%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.181%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.58%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.165%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.032%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.173%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.63%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.323%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.192%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.712%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.859%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.597%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.278%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.469%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.204%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.391%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.589%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.701%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.67%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.926%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.965%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.265%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.546%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.609%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.058%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.426%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.079%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.427%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.747%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.596%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.799%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.508%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.363%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.462%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.929%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.142%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.849%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.214%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.38%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.534%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.575%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.935%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.405%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.809%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.593%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.978%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.658%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.534%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.625%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.882%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.748%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.06%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.478%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.694%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.067%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.524%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.767%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.958%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.122%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.553%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.661%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.313%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.837%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.686%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.919%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.14%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.485%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.737%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.59%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.008%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.749%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.76%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.459%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.592%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.991%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.549%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.012%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.84%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.425%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.699%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.212%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.844%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.271%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.427%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.718%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.215%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.546%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.774%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.773%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.892%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.638%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.524%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.664%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.409%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.805%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.446%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.951%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.979%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.647%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.453%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.339%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.593%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.217%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.36%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.179%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.693%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.272%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.132%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.774%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.756%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.855%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.228%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.353%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.156%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.884%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.975%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.035%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.132%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.503%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.386%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.495%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.266%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.479%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.858%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.35%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.214%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.803%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.175%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.798%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.375%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.703%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.396%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.245%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.659%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.601%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.411%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.774%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.889%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.327%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.847%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.2%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.131%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.693%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.649%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.924%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.979%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.177%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.436%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.895%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.019%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.944%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.557%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.71%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.899%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.787%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.963%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.333%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.468%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.663%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.939%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.421%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.946%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.733%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.947%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.568%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.855%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.214%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.145%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.64%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.145%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.472%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.252%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.896%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.272%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.297%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.003%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.175%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.08%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.692%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.52%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.566%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.649%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.663%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.556%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.02%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.843%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.462%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.29%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.691%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.66%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.404%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.812%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.908%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.444%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.002%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.269%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.733%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.564%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.266%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.305%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.183%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.557%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.036%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.516%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.748%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.235%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.076%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.355%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.705%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.041%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.42%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.305%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.419%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.015%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.358%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.551%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.211%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.746%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.261%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.683%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.474%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.254%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.835%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.192%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.924%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.244%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.249%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.049%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.468%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.391%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.718%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.891%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.139%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.764%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.312%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.981%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.905%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.465%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.481%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.58%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.128%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.258%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.384%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.323%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.024%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.915%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.166%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.681%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.34%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.034%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.538%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.12%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.681%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.161%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.216%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.119%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.921%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.221%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.06%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.546%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.617%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.382%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.465%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.412%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.766%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.409%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.708%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.926%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.838%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.405%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.767%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.298%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.763%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.764%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.881%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.237%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.931%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.938%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.582%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.57%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.8%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.658%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.133%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.622%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.056%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.094%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.094%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.069%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.236%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.483%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.612%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.68%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.034%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.917%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.579%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.418%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.696%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.885%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.491%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.057%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.462%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.023%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.136%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.37%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.988%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.587%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.337%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.085%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.928%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.262%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.58%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.451%
Accuracy by pairs of metrics is explored:
dfSmallR2WC <- as.data.frame(mtxSmallWC) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possWCVars[idx1], var2=possWCVars[idx2], rn=row_number())
dfSmallR2WC %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 666 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 rain cloudcover 0.983
## 2 precipitation cloudcover 0.982
## 3 snowfall cloudcover 0.896
## 4 cloudcover tod 0.886
## 5 cloudcover direct_normal_irradiance 0.879
## 6 cloudcover direct_radiation 0.878
## 7 cloudcover et0_fao_evapotranspiration 0.874
## 8 cloudcover cloudcover_low 0.874
## 9 cloudcover month 0.874
## 10 cloudcover cloudcover_high 0.873
## 11 cloudcover shortwave_radiation 0.873
## 12 cloudcover diffuse_radiation 0.873
## 13 hour cloudcover 0.871
## 14 relativehumidity_2m cloudcover 0.869
## 15 cloudcover cloudcover_mid 0.868
## 16 cloudcover vapor_pressure_deficit 0.867
## 17 cloudcover soil_moisture_0_to_7cm 0.864
## 18 cloudcover windgusts_10m 0.864
## 19 temperature_2m cloudcover 0.861
## 20 apparent_temperature cloudcover 0.860
## # ℹ 646 more rows
dfSmallR2WC %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="Accuracy in every 2-predictor model including self and one other",
subtitle="Predicting weathercode",
y="Range of accuracy (min-mean-max)",
x=NULL
)
dfSmallR2WC %>%
arrange(desc(r2)) %>%
filter(!str_detect(var2, "cloudcover"), !str_detect(var1, "cloudcover")) %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 528 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 precipitation snowfall 0.622
## 2 precipitation rain 0.622
## 3 rain snowfall 0.622
## 4 relativehumidity_2m rain 0.617
## 5 rain diffuse_radiation 0.617
## 6 rain year 0.612
## 7 rain tod 0.612
## 8 rain month 0.612
## 9 precipitation diffuse_radiation 0.612
## 10 relativehumidity_2m precipitation 0.611
## 11 precipitation tod 0.611
## 12 precipitation month 0.611
## 13 hour rain 0.610
## 14 precipitation year 0.609
## 15 hour precipitation 0.607
## 16 rain et0_fao_evapotranspiration 0.605
## 17 rain windgusts_10m 0.601
## 18 precipitation et0_fao_evapotranspiration 0.600
## 19 precipitation windgusts_10m 0.593
## 20 rain vapor_pressure_deficit 0.591
## # ℹ 508 more rows
dfSmallR2WC %>%
filter(var2!="precipitation",
var1!="precipitation",
!str_detect(var2, "cloudcover"),
!str_detect(var1, "cloudcover")
) %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="Accuracy in every 2-predictor model including self and one other",
subtitle="Predicting weathercode (excluding variable paired with 'precipitation' or 'cloudcover')",
y="Range of accuracy (min-mean-max)",
x=NULL
)
dfSmallR2WC %>%
arrange(desc(r2)) %>%
filter(!str_detect(var2, "rain|snow|precip"),
!str_detect(var1, "rain|snow|precip"),
!str_detect(var2, "cloudcover"),
!str_detect(var1, "cloudcover")
) %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 435 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 shortwave_radiation direct_normal_irradiance 0.541
## 2 direct_normal_irradiance diffuse_radiation 0.541
## 3 shortwave_radiation direct_radiation 0.539
## 4 direct_radiation diffuse_radiation 0.538
## 5 direct_radiation direct_normal_irradiance 0.535
## 6 shortwave_radiation diffuse_radiation 0.535
## 7 relativehumidity_2m tod 0.520
## 8 relativehumidity_2m diffuse_radiation 0.515
## 9 hour tod 0.515
## 10 year tod 0.515
## 11 month tod 0.515
## 12 diffuse_radiation et0_fao_evapotranspiration 0.512
## 13 hour direct_normal_irradiance 0.511
## 14 diffuse_radiation tod 0.510
## 15 diffuse_radiation vapor_pressure_deficit 0.507
## 16 direct_normal_irradiance et0_fao_evapotranspiration 0.502
## 17 et0_fao_evapotranspiration tod 0.500
## 18 year month 0.499
## 19 hour year 0.499
## 20 relativehumidity_2m direct_normal_irradiance 0.499
## # ℹ 415 more rows
# Null accuracy would pick the most frequent observation
allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))
## # A tibble: 13 × 3
## weathercode n pct
## <int> <int> <dbl>
## 1 0 296257 0.487
## 2 1 126163 0.207
## 3 3 72391 0.119
## 4 2 48304 0.0793
## 5 51 32254 0.0530
## 6 53 10815 0.0178
## 7 61 7011 0.0115
## 8 63 4947 0.00813
## 9 71 3713 0.00610
## 10 55 3461 0.00569
## 11 73 2353 0.00387
## 12 75 564 0.000926
## 13 65 551 0.000905
Cloud cover and precipitation are highly predictive of weathercode, with most other variables having little explanatory power (accuracy near or even below the ~50% baseline for predicting everything as weathercode 0)
Select combinations are explored using the full training dataset:
possLargeWC <- c("precipitation", "rain", "snowfall", "cloudcover")
possLargeWC
## [1] "precipitation" "rain" "snowfall" "cloudcover"
mtxLargeWC <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeWC)-1)) {
for(idx2 in (idx1+1):length(possLargeWC)) {
r2LargeWC <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=possLargeWC[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]]
mtxLargeWC <- rbind(mtxLargeWC, c(idx1, idx2, r2LargeWC))
}
}
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.259%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.423%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.431%
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.626%
dfLargeR2WC <- as.data.frame(mtxLargeWC) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeWC[idx1], var2=possLargeWC[idx2], rn=row_number())
dfLargeR2WC %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 rain cloudcover 0.984
## 2 precipitation cloudcover 0.984
## 3 snowfall cloudcover 0.896
## 4 precipitation snowfall 0.623
## 5 rain snowfall 0.623
## 6 precipitation rain 0.623
Accuracy by type of weathercode is further explored for one subset (cloud cover and rain):
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("cloudcover", "rain"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 98.431%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.994
## 2 Rain only 1
## 3 Snow 0
The model with cloud cover and rain as predictors is very accurate at predicting weathercode, with the exception of zero accuracy during snowfall (71, 73, 75)
Accuracy by type of weathercode is further explored for another subset (cloud cover and snow):
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("cloudcover", "snowfall"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.626%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.994
## 2 Rain only 0
## 3 Snow 1
The model with cloud cover and snow as predictors is very accurate at predicting weathercode, with the exception of zero accuracy during rain events without snowfall (51, 53, 55, 61, 63, 65)
Accuracy by type of weathercode is further explored for a third subset (rain and snow):
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("rain", "snowfall"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.267%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.577
## 2 Rain only 1
## 3 Snow 1
The model with rain and snow as predictors is very accurate at predicting weathercode during precipitation events, but can do no better than predicting the null ‘0’ for no-precipitation observations (0, 1, 2, 3)
Accuracy by type of weathercode is explored for the three main predictors with mtry=1:
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("rain", "snowfall", "cloudcover"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=1,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.328%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.790
## 2 Rain only 0.800
## 3 Snow 0.992
The model with three predictors and mtry=1 drives ~90% accuracy, lowest during “rain only” events
Accuracy by type of weathercode is explored for the three main predictors with mtry=2:
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("rain", "snowfall", "cloudcover"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=2,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.994
## 2 Rain only 1
## 3 Snow 0.992
The model with three predictors and mtry=2 drives ~99% accuracy, strong during all three event types
Accuracy by type of weathercode is explored for the three main predictors with mtry=3:
runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="weathercode",
xVars=c("rain", "snowfall", "cloudcover"),
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=FALSE,
mtry=3,
makePlots=TRUE,
returnData=TRUE
)[["tstPred"]] %>%
mutate(wcType=case_when(weathercode %in% c(0, 1, 2, 3)~"No Precip",
weathercode %in% c(51, 53, 55, 61, 63, 65)~"Rain only",
weathercode %in% c(71, 73, 75)~"Snow",
TRUE~"Other"
)
) %>%
group_by(wcType) %>%
summarize(acc=mean(weathercode==pred))
##
## Accuracy of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.429%
## # A tibble: 3 × 2
## wcType acc
## <chr> <dbl>
## 1 No Precip 0.994
## 2 Rain only 1
## 3 Snow 0.992
The model with three predictors and mtry=3 drives the same accuracy as the model with three predictors and mtry=2
A model is run to predict ground-level wind speed, at first allowing high-level wind speed as a predictor:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWindFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="windspeed_10m",
xVars=c(varsTrain[!str_detect(varsTrain, "^windspeed_10m$")]),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 14%. Estimated remaining time: 3 minutes, 4 seconds.
## Growing trees.. Progress: 30%. Estimated remaining time: 2 minutes, 29 seconds.
## Growing trees.. Progress: 42%. Estimated remaining time: 2 minutes, 7 seconds.
## Growing trees.. Progress: 57%. Estimated remaining time: 1 minute, 35 seconds.
## Growing trees.. Progress: 72%. Estimated remaining time: 1 minute, 1 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 30 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.23% (RMSE 1.26 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is effective (~95% R-squared) at predicting ground-level wind speed, primarily by leveraging high-level wind speed and ground-level wind gusts
Correlations between predictors and ground wind speed are assessed:
sapply(varsTrain, FUN=function(x) cor(allCity$windspeed_10m, allCity[[x]])) %>%
as.data.frame() %>%
rownames_to_column("var") %>%
tibble::as_tibble() %>%
purrr::set_names(c("var", "cor")) %>%
ggplot(aes(x=fct_reorder(var, cor), y=cor)) +
geom_col(fill="lightblue") +
geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
coord_flip() +
labs(title="Correlation with ground wind speed (windspeed_10m)",
y="Correlation",
x=NULL
) +
lims(y=c(NA, 1))
allCity %>%
select(windspeed_10m, windspeed_100m, windgusts_10m) %>%
mutate(across(.cols=everything(), .fns=function(x) round(x)), rn=row_number()) %>%
pivot_longer(cols=-c(rn, windspeed_10m)) %>%
count(windspeed_10m, name, value) %>%
ggplot(aes(x=value, y=windspeed_10m)) +
geom_point(aes(size=n), alpha=0.5) +
geom_smooth(aes(weight=n), method="lm") +
facet_wrap(~name) +
labs(x=NULL, title="Ground-level (10m) windspeed vs. two strong predictors")
## `geom_smooth()` using formula = 'y ~ x'
The linear model is run for ground wind speed, using all predictors:
# Eliminate diffuse radiation due to rank-deficiency
lmWindFull <- lm(windspeed_10m ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmWindFull)
##
## Call:
## lm(formula = windspeed_10m ~ ., data = allCity %>% filter(tt ==
## "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>%
## select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4822 -0.9703 -0.0413 0.9710 9.3060
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.291e+02 1.630e+00 -79.217 < 2e-16 ***
## hour 1.240e-02 4.307e-04 28.797 < 2e-16 ***
## temperature_2m 4.805e-01 4.035e-03 119.067 < 2e-16 ***
## relativehumidity_2m 6.994e-03 4.113e-04 17.003 < 2e-16 ***
## dewpoint_2m 9.937e-02 1.359e-03 73.138 < 2e-16 ***
## apparent_temperature -5.625e-01 3.330e-03 -168.930 < 2e-16 ***
## pressure_msl 4.571e-02 6.078e-04 75.203 < 2e-16 ***
## surface_pressure -1.783e-02 2.712e-04 -65.756 < 2e-16 ***
## precipitation 3.449e+00 7.060e-01 4.884 1.04e-06 ***
## rain -3.465e+00 7.065e-01 -4.905 9.35e-07 ***
## snowfall -5.648e+00 1.023e+00 -5.520 3.39e-08 ***
## cloudcover 7.392e-03 4.485e-04 16.482 < 2e-16 ***
## cloudcover_low 1.615e-03 2.315e-04 6.976 3.05e-12 ***
## cloudcover_mid -5.238e-03 1.728e-04 -30.320 < 2e-16 ***
## cloudcover_high -2.226e-03 1.094e-04 -20.351 < 2e-16 ***
## shortwave_radiation 5.895e-03 6.310e-05 93.422 < 2e-16 ***
## direct_radiation -5.542e-03 6.760e-05 -81.991 < 2e-16 ***
## direct_normal_irradiance 9.342e-04 2.526e-05 36.982 < 2e-16 ***
## windspeed_100m 4.904e-01 6.476e-04 757.182 < 2e-16 ***
## winddirection_10m 5.565e-04 4.183e-05 13.303 < 2e-16 ***
## winddirection_100m -1.441e-04 4.220e-05 -3.414 0.00064 ***
## windgusts_10m 9.405e-02 5.358e-04 175.545 < 2e-16 ***
## et0_fao_evapotranspiration 3.908e+00 7.365e-02 53.065 < 2e-16 ***
## weathercode1 2.258e-01 1.099e-02 20.556 < 2e-16 ***
## weathercode2 2.755e-01 1.976e-02 13.943 < 2e-16 ***
## weathercode3 3.874e-01 2.602e-02 14.888 < 2e-16 ***
## weathercode51 4.848e-01 2.336e-02 20.758 < 2e-16 ***
## weathercode53 1.523e-01 3.095e-02 4.922 8.56e-07 ***
## weathercode55 7.405e-02 4.383e-02 1.689 0.09116 .
## weathercode61 6.336e-02 4.221e-02 1.501 0.13339
## weathercode63 -6.724e-02 6.944e-02 -0.968 0.33290
## weathercode65 5.492e-01 1.901e-01 2.889 0.00387 **
## weathercode71 1.761e+00 4.692e-02 37.528 < 2e-16 ***
## weathercode73 1.848e+00 6.804e-02 27.159 < 2e-16 ***
## weathercode75 1.846e+00 1.898e-01 9.725 < 2e-16 ***
## vapor_pressure_deficit -2.981e-01 8.028e-03 -37.135 < 2e-16 ***
## soil_temperature_0_to_7cm 9.211e-03 1.223e-03 7.529 5.12e-14 ***
## soil_temperature_7_to_28cm 1.158e-01 2.052e-03 56.439 < 2e-16 ***
## soil_temperature_28_to_100cm -8.989e-03 2.277e-03 -3.948 7.87e-05 ***
## soil_temperature_100_to_255cm -3.123e-03 1.173e-03 -2.662 0.00776 **
## soil_moisture_0_to_7cm 1.261e+00 7.054e-02 17.881 < 2e-16 ***
## soil_moisture_7_to_28cm -2.749e+00 1.003e-01 -27.406 < 2e-16 ***
## soil_moisture_28_to_100cm -4.102e+00 7.529e-02 -54.480 < 2e-16 ***
## soil_moisture_100_to_255cm 6.598e+00 7.354e-02 89.720 < 2e-16 ***
## year 4.777e-02 7.887e-04 60.564 < 2e-16 ***
## doy 2.214e-04 3.163e-05 7.001 2.55e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.546 on 368064 degrees of freedom
## Multiple R-squared: 0.9493, Adjusted R-squared: 0.9493
## F-statistic: 1.533e+05 on 45 and 368064 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmWindFull, newdata=.)) %>%
summarize(meModel=mean((pred-windspeed_10m)**2),
meBase=mean((windspeed_10m-mean(windspeed_10m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 3.47 57.5 0.940 1.86
summary(lmWindFull)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 windspeed_100m 0.490 0.000648 757. 0
## 2 windgusts_10m 0.0941 0.000536 176. 0
## 3 apparent_temperature -0.562 0.00333 -169. 0
## 4 temperature_2m 0.480 0.00404 119. 0
## 5 shortwave_radiation 0.00589 0.0000631 93.4 0
## 6 soil_moisture_100_to_255cm 6.60 0.0735 89.7 0
## 7 direct_radiation -0.00554 0.0000676 -82.0 0
## 8 (Intercept) -129. 1.63 -79.2 0
## 9 pressure_msl 0.0457 0.000608 75.2 0
## 10 dewpoint_2m 0.0994 0.00136 73.1 0
## # ℹ 36 more rows
Even with many confounders, the linear model largely identifies that high-level wind-speed is a strong predictor for ground-level wind speed. Many other variables have statistically significant impact also, with wind gusts, apparent temperature, and actual temperature being of interest
The model is re-run using only the best four predictors:
# Best predictors only
lmWindFour <- lm(windspeed_10m ~ windspeed_100m + windgusts_10m + apparent_temperature + temperature_2m,
data=allCity %>%
filter(tt=="train", year<2022) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmWindFour)
##
## Call:
## lm(formula = windspeed_10m ~ windspeed_100m + windgusts_10m +
## apparent_temperature + temperature_2m, data = allCity %>%
## filter(tt == "train", year < 2022) %>% select(all_of(varsTrain)) %>%
## select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.8387 -1.1335 0.0315 1.1358 10.4214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.5525758 0.0093847 -165.4 <2e-16 ***
## windspeed_100m 0.4445631 0.0005079 875.4 <2e-16 ***
## windgusts_10m 0.1732658 0.0004583 378.1 <2e-16 ***
## apparent_temperature -0.1764342 0.0014401 -122.5 <2e-16 ***
## temperature_2m 0.2140689 0.0017273 123.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.863 on 368105 degrees of freedom
## Multiple R-squared: 0.9264, Adjusted R-squared: 0.9264
## F-statistic: 1.159e+06 on 4 and 368105 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(pred=predict(lmWindFour, newdata=.)) %>%
summarize(meModel=mean((pred-windspeed_10m)**2),
meBase=mean((windspeed_10m-mean(windspeed_10m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 5.01 57.5 0.913 2.24
The four-predictor model retains most, but not all, of the explanatory power of the full model. R2 on the test dataset falls from ~94% to ~92% (RMSE increases from ~1.9 to ~2.2)
Each of the best-four predictors is run individually, with metrics assessed on training data:
sapply(c("windspeed_100m", "windgusts_10m", "apparent_temperature", "temperature_2m"),
FUN=function(x) summary(lm(windspeed_10m ~ get(x), data=allCity))[c("sigma", "r.squared")] %>% unlist
) %>%
t()
## sigma r.squared
## windspeed_100m 2.389386 0.88251896
## windgusts_10m 3.427561 0.75825046
## apparent_temperature 6.835019 0.03866518
## temperature_2m 6.916588 0.01558339
High-level wind speed and wind gusts have good standalone predictive power on ground level wind speed. Temperature and apparent temperature have essentially no standalone predictive power on ground level wind speed
Each variable is run through the random forest standalone, using a smaller training dataset:
# Variables to explore
useWind <- c(varsTrain[varsTrain!="windspeed_10m"], "src", "todSeason")
useWind
## [1] "hour" "temperature_2m"
## [3] "relativehumidity_2m" "dewpoint_2m"
## [5] "apparent_temperature" "pressure_msl"
## [7] "surface_pressure" "precipitation"
## [9] "rain" "snowfall"
## [11] "cloudcover" "cloudcover_low"
## [13] "cloudcover_mid" "cloudcover_high"
## [15] "shortwave_radiation" "direct_radiation"
## [17] "direct_normal_irradiance" "diffuse_radiation"
## [19] "windspeed_100m" "winddirection_10m"
## [21] "winddirection_100m" "windgusts_10m"
## [23] "et0_fao_evapotranspiration" "weathercode"
## [25] "vapor_pressure_deficit" "soil_temperature_0_to_7cm"
## [27] "soil_temperature_7_to_28cm" "soil_temperature_28_to_100cm"
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"
## [31] "soil_moisture_7_to_28cm" "soil_moisture_28_to_100cm"
## [33] "soil_moisture_100_to_255cm" "year"
## [35] "doy" "src"
## [37] "todSeason"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24091314)
idxSmallWind <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWind <- matrix(nrow=0, ncol=3)
# Map each variable to file
rfWindOneSmall <- map_dfr(.x=useWind,
.f=function(x) runFullRF(dfTrain=dfTrainCloud[idxSmallWind,],
yVar="windspeed_10m",
xVars=x,
dfTest=dfTestCloud,
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]] %>%
t() %>%
as_tibble(),
.id="varNum"
) %>%
mutate(varName=useWind[as.numeric(varNum)])
##
## R-squared of test data is: -0.615% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of test data is: -5.015% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of test data is: -0.827% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of test data is: -7.341% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of test data is: -2.292% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of test data is: -4.177% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of test data is: 8.616% (RMSE 7.25 vs. 7.58 null)
##
## R-squared of test data is: -0.975% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of test data is: -1.665% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of test data is: -1.356% (RMSE 7.64 vs. 7.58 null)
##
## R-squared of test data is: 0.054% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of test data is: -1.597% (RMSE 7.64 vs. 7.58 null)
##
## R-squared of test data is: -0.565% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of test data is: -2.852% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of test data is: -13.837% (RMSE 8.09 vs. 7.58 null)
##
## R-squared of test data is: -11.645% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of test data is: -15.374% (RMSE 8.15 vs. 7.58 null)
##
## R-squared of test data is: -5.057% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of test data is: 86.606% (RMSE 2.78 vs. 7.58 null)
##
## R-squared of test data is: -0.053% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of test data is: -2.124% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of test data is: 73.62% (RMSE 3.9 vs. 7.58 null)
##
## R-squared of test data is: 5.661% (RMSE 7.37 vs. 7.58 null)
##
## R-squared of test data is: 1.555% (RMSE 7.53 vs. 7.58 null)
##
## R-squared of test data is: -7.35% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of test data is: -6.337% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of test data is: -4.04% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of test data is: 1.29% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of test data is: -0.911% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of test data is: -5.933% (RMSE 7.81 vs. 7.58 null)
##
## R-squared of test data is: -10.608% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of test data is: -12.566% (RMSE 8.05 vs. 7.58 null)
##
## R-squared of test data is: 12.929% (RMSE 7.08 vs. 7.58 null)
##
## R-squared of test data is: -0.394% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of test data is: -5.141% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of test data is: 17.812% (RMSE 6.88 vs. 7.58 null)
##
## R-squared of test data is: 1.129% (RMSE 7.54 vs. 7.58 null)
rfWindOneSmall %>% arrange(desc(r2))
## # A tibble: 37 × 5
## varNum mseNull msePred r2 varName
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 19 57.5 7.70 0.866 windspeed_100m
## 2 22 57.5 15.2 0.736 windgusts_10m
## 3 36 57.5 47.3 0.178 src
## 4 33 57.5 50.1 0.129 soil_moisture_100_to_255cm
## 5 7 57.5 52.6 0.0862 surface_pressure
## 6 23 57.5 54.3 0.0566 et0_fao_evapotranspiration
## 7 24 57.5 56.6 0.0155 weathercode
## 8 28 57.5 56.8 0.0129 soil_temperature_28_to_100cm
## 9 37 57.5 56.9 0.0113 todSeason
## 10 11 57.5 57.5 0.000541 cloudcover
## # ℹ 27 more rows
The random forest model identifies the same standalone best predictors, and with very similar R2
A null model is run to predict ground-level wind speed, using only city, month, and day/night (mtry=3):
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfWindNull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="windspeed_10m",
xVars=c("src", "month", "tod"),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
mtry=3,
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 74%. Estimated remaining time: 10 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.22% (RMSE 6.56 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'
The null model is minimally effective (~25% R-squared) at predicting ground-level wind speed
The same null model is run using lm:
# Null predictors only
lmWindNull <- lm(windspeed_10m ~ src:month:tod + 0,
data=allCity %>%
filter(tt=="train", year<2022)
)
summary(lmWindNull)
##
## Call:
## lm(formula = windspeed_10m ~ src:month:tod + 0, data = allCity %>%
## filter(tt == "train", year < 2022))
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.907 -3.906 -0.854 3.003 58.185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## srcChicago:monthJan:todDay 17.6661 0.1059 166.80 <2e-16 ***
## srcHouston:monthJan:todDay 14.6756 0.1049 139.93 <2e-16 ***
## srcLA:monthJan:todDay 8.3357 0.1053 79.14 <2e-16 ***
## srcNYC:monthJan:todDay 12.9985 0.1051 123.70 <2e-16 ***
## srcVegas:monthJan:todDay 7.4405 0.1052 70.71 <2e-16 ***
## srcChicago:monthFeb:todDay 17.4299 0.1101 158.36 <2e-16 ***
## srcHouston:monthFeb:todDay 15.2828 0.1107 138.02 <2e-16 ***
## srcLA:monthFeb:todDay 8.1749 0.1102 74.19 <2e-16 ***
## srcNYC:monthFeb:todDay 12.5546 0.1106 113.54 <2e-16 ***
## srcVegas:monthFeb:todDay 9.6153 0.1108 86.80 <2e-16 ***
## srcChicago:monthMar:todDay 17.3240 0.1038 166.86 <2e-16 ***
## srcHouston:monthMar:todDay 15.3995 0.1056 145.90 <2e-16 ***
## srcLA:monthMar:todDay 8.2840 0.1048 79.02 <2e-16 ***
## srcNYC:monthMar:todDay 13.9386 0.1048 132.99 <2e-16 ***
## srcVegas:monthMar:todDay 11.1745 0.1040 107.50 <2e-16 ***
## srcChicago:monthApr:todDay 18.3074 0.1067 171.50 <2e-16 ***
## srcHouston:monthApr:todDay 16.1890 0.1073 150.83 <2e-16 ***
## srcLA:monthApr:todDay 9.0709 0.1065 85.20 <2e-16 ***
## srcNYC:monthApr:todDay 13.4615 0.1075 125.25 <2e-16 ***
## srcVegas:monthApr:todDay 12.3777 0.1071 115.55 <2e-16 ***
## srcChicago:monthMay:todDay 16.5924 0.1050 157.98 <2e-16 ***
## srcHouston:monthMay:todDay 14.9801 0.1060 141.37 <2e-16 ***
## srcLA:monthMay:todDay 8.8806 0.1048 84.70 <2e-16 ***
## srcNYC:monthMay:todDay 11.6595 0.1055 110.50 <2e-16 ***
## srcVegas:monthMay:todDay 12.2198 0.1054 115.96 <2e-16 ***
## srcChicago:monthJun:todDay 14.8766 0.1069 139.11 <2e-16 ***
## srcHouston:monthJun:todDay 11.9026 0.1067 111.52 <2e-16 ***
## srcLA:monthJun:todDay 8.1789 0.1072 76.32 <2e-16 ***
## srcNYC:monthJun:todDay 10.8856 0.1069 101.81 <2e-16 ***
## srcVegas:monthJun:todDay 12.0571 0.1074 112.31 <2e-16 ***
## srcChicago:monthJul:todDay 13.2059 0.1051 125.64 <2e-16 ***
## srcHouston:monthJul:todDay 10.0919 0.1045 96.59 <2e-16 ***
## srcLA:monthJul:todDay 7.6432 0.1056 72.40 <2e-16 ***
## srcNYC:monthJul:todDay 9.6149 0.1044 92.06 <2e-16 ***
## srcVegas:monthJul:todDay 10.4480 0.1056 98.97 <2e-16 ***
## srcChicago:monthAug:todDay 12.5962 0.1046 120.43 <2e-16 ***
## srcHouston:monthAug:todDay 9.9503 0.1053 94.50 <2e-16 ***
## srcLA:monthAug:todDay 7.4806 0.1046 71.51 <2e-16 ***
## srcNYC:monthAug:todDay 9.4848 0.1041 91.10 <2e-16 ***
## srcVegas:monthAug:todDay 10.0057 0.1047 95.57 <2e-16 ***
## srcChicago:monthSep:todDay 15.2389 0.1064 143.25 <2e-16 ***
## srcHouston:monthSep:todDay 11.0266 0.1064 103.59 <2e-16 ***
## srcLA:monthSep:todDay 7.2291 0.1065 67.89 <2e-16 ***
## srcNYC:monthSep:todDay 10.5536 0.1061 99.47 <2e-16 ***
## srcVegas:monthSep:todDay 9.2257 0.1068 86.37 <2e-16 ***
## srcChicago:monthOct:todDay 17.6309 0.1051 167.76 <2e-16 ***
## srcHouston:monthOct:todDay 12.2793 0.1049 117.08 <2e-16 ***
## srcLA:monthOct:todDay 7.3985 0.1051 70.41 <2e-16 ***
## srcNYC:monthOct:todDay 12.3331 0.1051 117.32 <2e-16 ***
## srcVegas:monthOct:todDay 8.4465 0.1051 80.36 <2e-16 ***
## srcChicago:monthNov:todDay 18.4158 0.1068 172.46 <2e-16 ***
## srcHouston:monthNov:todDay 13.5093 0.1067 126.64 <2e-16 ***
## srcLA:monthNov:todDay 7.4528 0.1070 69.68 <2e-16 ***
## srcNYC:monthNov:todDay 12.3604 0.1072 115.35 <2e-16 ***
## srcVegas:monthNov:todDay 7.7986 0.1063 73.33 <2e-16 ***
## srcChicago:monthDec:todDay 17.2362 0.1044 165.16 <2e-16 ***
## srcHouston:monthDec:todDay 14.1162 0.1057 133.61 <2e-16 ***
## srcLA:monthDec:todDay 8.1803 0.1055 77.56 <2e-16 ***
## srcNYC:monthDec:todDay 11.9286 0.1046 113.99 <2e-16 ***
## srcVegas:monthDec:todDay 7.4219 0.1048 70.84 <2e-16 ***
## srcChicago:monthJan:todNight 17.6431 0.1052 167.69 <2e-16 ***
## srcHouston:monthJan:todNight 12.5177 0.1054 118.81 <2e-16 ***
## srcLA:monthJan:todNight 8.2679 0.1062 77.86 <2e-16 ***
## srcNYC:monthJan:todNight 12.0810 0.1052 114.88 <2e-16 ***
## srcVegas:monthJan:todNight 6.7663 0.1062 63.70 <2e-16 ***
## srcChicago:monthFeb:todNight 17.3093 0.1102 157.04 <2e-16 ***
## srcHouston:monthFeb:todNight 13.2973 0.1107 120.09 <2e-16 ***
## srcLA:monthFeb:todNight 6.9601 0.1098 63.39 <2e-16 ***
## srcNYC:monthFeb:todNight 11.9201 0.1107 107.65 <2e-16 ***
## srcVegas:monthFeb:todNight 8.0001 0.1097 72.92 <2e-16 ***
## srcChicago:monthMar:todNight 16.3501 0.1054 155.18 <2e-16 ***
## srcHouston:monthMar:todNight 12.9646 0.1048 123.68 <2e-16 ***
## srcLA:monthMar:todNight 6.0397 0.1047 57.66 <2e-16 ***
## srcNYC:monthMar:todNight 12.5029 0.1057 118.30 <2e-16 ***
## srcVegas:monthMar:todNight 8.9150 0.1056 84.45 <2e-16 ***
## srcChicago:monthApr:todNight 16.5691 0.1069 154.94 <2e-16 ***
## srcHouston:monthApr:todNight 13.2652 0.1073 123.61 <2e-16 ***
## srcLA:monthApr:todNight 5.8407 0.1061 55.07 <2e-16 ***
## srcNYC:monthApr:todNight 11.8948 0.1070 111.17 <2e-16 ***
## srcVegas:monthApr:todNight 9.9145 0.1059 93.61 <2e-16 ***
## srcChicago:monthMay:todNight 14.9166 0.1046 142.62 <2e-16 ***
## srcHouston:monthMay:todNight 12.4708 0.1049 118.93 <2e-16 ***
## srcLA:monthMay:todNight 4.5259 0.1044 43.33 <2e-16 ***
## srcNYC:monthMay:todNight 10.3662 0.1047 99.05 <2e-16 ***
## srcVegas:monthMay:todNight 9.9005 0.1054 93.97 <2e-16 ***
## srcChicago:monthJun:todNight 13.6958 0.1067 128.34 <2e-16 ***
## srcHouston:monthJun:todNight 10.3851 0.1064 97.62 <2e-16 ***
## srcLA:monthJun:todNight 3.9178 0.1072 36.56 <2e-16 ***
## srcNYC:monthJun:todNight 9.6970 0.1073 90.39 <2e-16 ***
## srcVegas:monthJun:todNight 9.7362 0.1066 91.34 <2e-16 ***
## srcChicago:monthJul:todNight 12.0225 0.1048 114.69 <2e-16 ***
## srcHouston:monthJul:todNight 9.6380 0.1054 91.42 <2e-16 ***
## srcLA:monthJul:todNight 3.7239 0.1053 35.36 <2e-16 ***
## srcNYC:monthJul:todNight 8.9811 0.1053 85.25 <2e-16 ***
## srcVegas:monthJul:todNight 8.3553 0.1046 79.87 <2e-16 ***
## srcChicago:monthAug:todNight 11.3610 0.1046 108.62 <2e-16 ***
## srcHouston:monthAug:todNight 9.8231 0.1051 93.50 <2e-16 ***
## srcLA:monthAug:todNight 3.4544 0.1060 32.60 <2e-16 ***
## srcNYC:monthAug:todNight 8.4208 0.1053 79.96 <2e-16 ***
## srcVegas:monthAug:todNight 7.9305 0.1046 75.83 <2e-16 ***
## srcChicago:monthSep:todNight 14.4814 0.1073 134.99 <2e-16 ***
## srcHouston:monthSep:todNight 9.2157 0.1074 85.85 <2e-16 ***
## srcLA:monthSep:todNight 4.0097 0.1066 37.62 <2e-16 ***
## srcNYC:monthSep:todNight 9.6622 0.1069 90.40 <2e-16 ***
## srcVegas:monthSep:todNight 7.7219 0.1060 72.84 <2e-16 ***
## srcChicago:monthOct:todNight 16.6812 0.1050 158.88 <2e-16 ***
## srcHouston:monthOct:todNight 10.8022 0.1050 102.90 <2e-16 ***
## srcLA:monthOct:todNight 5.5429 0.1047 52.92 <2e-16 ***
## srcNYC:monthOct:todNight 11.4524 0.1044 109.69 <2e-16 ***
## srcVegas:monthOct:todNight 7.6152 0.1049 72.62 <2e-16 ***
## srcChicago:monthNov:todNight 17.9773 0.1070 168.02 <2e-16 ***
## srcHouston:monthNov:todNight 11.8271 0.1071 110.45 <2e-16 ***
## srcLA:monthNov:todNight 6.8622 0.1071 64.06 <2e-16 ***
## srcNYC:monthNov:todNight 11.7894 0.1073 109.89 <2e-16 ***
## srcVegas:monthNov:todNight 7.2725 0.1074 67.72 <2e-16 ***
## srcChicago:monthDec:todNight 16.9604 0.1048 161.90 <2e-16 ***
## srcHouston:monthDec:todNight 12.7281 0.1055 120.67 <2e-16 ***
## srcLA:monthDec:todNight 7.9781 0.1060 75.26 <2e-16 ***
## srcNYC:monthDec:todNight 11.6663 0.1050 111.15 <2e-16 ***
## srcVegas:monthDec:todNight 7.1803 0.1044 68.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.874 on 367990 degrees of freedom
## Multiple R-squared: 0.7978, Adjusted R-squared: 0.7977
## F-statistic: 1.21e+04 on 120 and 367990 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(pred=predict(lmWindNull, newdata=.)) %>%
summarize(meModel=mean((pred-windspeed_10m)**2),
meBase=mean((windspeed_10m-mean(windspeed_10m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 43.0 57.5 0.252 6.56
As expected, the models drive nearly identical results and R-squared
A model is run to predict ground-level wind speed, excluding high-level wind speed and gusts as predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="windspeed_10m",
xVars=c(varsTrain[!str_detect(varsTrain, "^wind[sg]")], "src", "month", "tod"),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
## Growing trees.. Progress: 8%. Estimated remaining time: 5 minutes, 38 seconds.
## Growing trees.. Progress: 17%. Estimated remaining time: 5 minutes, 3 seconds.
## Growing trees.. Progress: 27%. Estimated remaining time: 4 minutes, 11 seconds.
## Growing trees.. Progress: 39%. Estimated remaining time: 3 minutes, 17 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 2 minutes, 21 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 1 minute, 38 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 93%. Estimated remaining time: 17 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.684% (RMSE 4.11 vs. 7.58 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is reasonably effective (~70% R-squared) at predicting ground-level wind speed, by leveraging multiple predictors in combination
The linear model is re-run for ground wind speed, excluding wind gusts and high-level wind speed:
# Eliminate diffuse radiation due to rank-deficiency
lmWindNoWG <- lm(windspeed_10m ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain), src, month, tod) %>%
select(-diffuse_radiation, -windgusts_10m, -windspeed_100m)
)
summary(lmWindNoWG)
##
## Call:
## lm(formula = windspeed_10m ~ ., data = allCity %>% filter(tt ==
## "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>%
## select(all_of(varsTrain), src, month, tod) %>% select(-diffuse_radiation,
## -windgusts_10m, -windspeed_100m))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5453 -2.0461 -0.2352 1.8298 25.8615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.794e+01 3.444e+00 -25.530 < 2e-16 ***
## hour -5.290e-03 8.995e-04 -5.881 4.09e-09 ***
## temperature_2m 4.215e+00 7.368e-03 572.153 < 2e-16 ***
## relativehumidity_2m 4.803e-02 8.794e-04 54.620 < 2e-16 ***
## dewpoint_2m 2.985e-01 2.848e-03 104.818 < 2e-16 ***
## apparent_temperature -3.856e+00 5.570e-03 -692.312 < 2e-16 ***
## pressure_msl 7.602e-01 8.150e-03 93.274 < 2e-16 ***
## surface_pressure -8.658e-01 8.277e-03 -104.604 < 2e-16 ***
## precipitation -5.503e+00 1.475e+00 -3.731 0.000190 ***
## rain 5.736e+00 1.476e+00 3.886 0.000102 ***
## snowfall 1.022e+01 2.137e+00 4.780 1.75e-06 ***
## cloudcover 1.580e-02 9.370e-04 16.862 < 2e-16 ***
## cloudcover_low -4.887e-03 4.837e-04 -10.104 < 2e-16 ***
## cloudcover_mid -1.365e-02 3.621e-04 -37.703 < 2e-16 ***
## cloudcover_high -1.396e-03 2.287e-04 -6.105 1.03e-09 ***
## shortwave_radiation -1.250e-02 1.369e-04 -91.338 < 2e-16 ***
## direct_radiation 7.379e-03 1.589e-04 46.428 < 2e-16 ***
## direct_normal_irradiance -2.042e-03 5.836e-05 -34.997 < 2e-16 ***
## winddirection_10m -2.326e-04 8.770e-05 -2.653 0.007979 **
## winddirection_100m 2.842e-03 8.837e-05 32.159 < 2e-16 ***
## et0_fao_evapotranspiration 2.837e+01 1.527e-01 185.744 < 2e-16 ***
## weathercode1 1.826e-01 2.295e-02 7.958 1.76e-15 ***
## weathercode2 1.877e-01 4.127e-02 4.548 5.41e-06 ***
## weathercode3 -8.981e-02 5.437e-02 -1.652 0.098590 .
## weathercode51 1.195e+00 4.870e-02 24.530 < 2e-16 ***
## weathercode53 1.034e+00 6.451e-02 16.029 < 2e-16 ***
## weathercode55 1.104e+00 9.142e-02 12.073 < 2e-16 ***
## weathercode61 1.255e+00 8.802e-02 14.264 < 2e-16 ***
## weathercode63 1.377e+00 1.449e-01 9.509 < 2e-16 ***
## weathercode65 8.559e-01 3.970e-01 2.156 0.031092 *
## weathercode71 1.541e+00 9.822e-02 15.692 < 2e-16 ***
## weathercode73 1.471e+00 1.422e-01 10.343 < 2e-16 ***
## weathercode75 9.751e-01 3.964e-01 2.460 0.013910 *
## vapor_pressure_deficit -3.853e+00 1.831e-02 -210.414 < 2e-16 ***
## soil_temperature_0_to_7cm 2.012e-01 2.594e-03 77.579 < 2e-16 ***
## soil_temperature_7_to_28cm 2.356e-01 4.389e-03 53.679 < 2e-16 ***
## soil_temperature_28_to_100cm -2.907e-01 5.390e-03 -53.940 < 2e-16 ***
## soil_temperature_100_to_255cm -4.260e-02 4.254e-03 -10.014 < 2e-16 ***
## soil_moisture_0_to_7cm 9.197e+00 1.513e-01 60.780 < 2e-16 ***
## soil_moisture_7_to_28cm -9.172e+00 2.142e-01 -42.823 < 2e-16 ***
## soil_moisture_28_to_100cm 1.344e+00 1.629e-01 8.254 < 2e-16 ***
## soil_moisture_100_to_255cm -1.393e+00 2.364e-01 -5.891 3.85e-09 ***
## year 8.494e-02 1.680e-03 50.569 < 2e-16 ***
## doy 5.569e-03 6.054e-04 9.199 < 2e-16 ***
## srcHouston 1.927e+01 1.650e-01 116.744 < 2e-16 ***
## srcLA -2.018e+01 1.493e-01 -135.132 < 2e-16 ***
## srcNYC 1.319e+01 1.455e-01 90.657 < 2e-16 ***
## srcVegas -5.298e+01 4.716e-01 -112.350 < 2e-16 ***
## monthFeb -5.999e-01 3.345e-02 -17.934 < 2e-16 ***
## monthMar -1.341e+00 4.800e-02 -27.950 < 2e-16 ***
## monthApr -1.825e+00 6.593e-02 -27.686 < 2e-16 ***
## monthMay -1.265e+00 8.379e-02 -15.097 < 2e-16 ***
## monthJun 4.304e-01 1.026e-01 4.195 2.72e-05 ***
## monthJul 2.428e+00 1.206e-01 20.135 < 2e-16 ***
## monthAug 2.176e+00 1.375e-01 15.823 < 2e-16 ***
## monthSep 9.205e-01 1.536e-01 5.991 2.08e-09 ***
## monthOct -1.032e+00 1.696e-01 -6.083 1.18e-09 ***
## monthNov -1.640e+00 1.866e-01 -8.792 < 2e-16 ***
## monthDec -1.907e+00 2.042e-01 -9.341 < 2e-16 ***
## todNight 9.624e-02 1.877e-02 5.129 2.92e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.229 on 368050 degrees of freedom
## Multiple R-squared: 0.7791, Adjusted R-squared: 0.7791
## F-statistic: 2.2e+04 on 59 and 368050 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmWindNoWG, newdata=.)) %>%
summarize(meModel=mean((pred-windspeed_10m)**2),
meBase=mean((windspeed_10m-mean(windspeed_10m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 12.3 57.5 0.787 3.50
summary(lmWindNoWG)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 60 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 apparent_temperature -3.86 0.00557 -692. 0
## 2 temperature_2m 4.22 0.00737 572. 0
## 3 vapor_pressure_deficit -3.85 0.0183 -210. 0
## 4 et0_fao_evapotranspiration 28.4 0.153 186. 0
## 5 srcLA -20.2 0.149 -135. 0
## 6 srcHouston 19.3 0.165 117. 0
## 7 srcVegas -53.0 0.472 -112. 0
## 8 dewpoint_2m 0.299 0.00285 105. 0
## 9 surface_pressure -0.866 0.00828 -105. 0
## 10 pressure_msl 0.760 0.00815 93.3 0
## # ℹ 50 more rows
Even with many confounders, the linear model perfroms slightly better than the random forest, driving RMSE down to ~3.5 and R2 up to ~80%
All combinations of two variables are explored for predicting ground wind speed on a smaller training dataset:
# Train and test data created previously (dfTrainCloud and dfTestCloud)
# Variables to explore
possWSVars <- c(varsTrain[!str_detect(varsTrain, "windspeed_10m")], "month", "tod", "src")
# Subsets to use
set.seed(24091816)
idxSmallWS <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallWS <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possWSVars)-1)) {
for(idx2 in (idx1+1):length(possWSVars)) {
r2SmallWS <- runFullRF(dfTrain=dfTrainCloud[idxSmallWS,] %>% mutate(weathercode=factor(weathercode)),
yVar="windspeed_10m",
xVars=possWSVars[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallWS <- rbind(mtxSmallWS, c(idx1, idx2, r2SmallWS))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.134% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.79% (RMSE 8.13 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.646% (RMSE 8.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.312% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.339% (RMSE 8.04 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.449% (RMSE 7.14 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.523% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.982% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.631% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.647% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.585% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.835% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.447% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.354% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.263% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.186% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.707% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.454% (RMSE 2.34 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.985% (RMSE 8.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.158% (RMSE 8.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.489% (RMSE 3.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.736% (RMSE 7.44 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.265% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.43% (RMSE 8.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.245% (RMSE 8.04 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.296% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.717% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.809% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.478% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.043% (RMSE 7.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.111% (RMSE 8.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.988% (RMSE 7.28 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.53% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.356% (RMSE 8.08 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.229% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.666% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.455% (RMSE 6.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.062% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.988% (RMSE 7.81 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.167% (RMSE 5.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.159% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.273% (RMSE 7.1 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.942% (RMSE 7.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.536% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.046% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.195% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.86% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.009% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.358% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.266% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.774% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.13% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.126% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.59% (RMSE 2.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.864% (RMSE 7.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.453% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.7% (RMSE 3.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.806% (RMSE 7.52 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.126% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.199% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.569% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.302% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.145% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.166% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.649% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.365% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.838% (RMSE 8.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.551% (RMSE 7.37 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.33% (RMSE 8.29 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.924% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.867% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.09% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.051% (RMSE 7.47 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.659% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.026% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.719% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.065% (RMSE 7.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.815% (RMSE 7.52 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.866% (RMSE 7.55 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.563% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.818% (RMSE 7.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.658% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.511% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.289% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.49% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.572% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.205% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.626% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.251% (RMSE 2.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.676% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.883% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.236% (RMSE 3.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.651% (RMSE 7.37 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.868% (RMSE 7.47 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.167% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.015% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.512% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.122% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.594% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.449% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.313% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.278% (RMSE 8.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.122% (RMSE 7.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.633% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.482% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.561% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.856% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.281% (RMSE 6.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.375% (RMSE 7.38 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.862% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.094% (RMSE 7.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.385% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.279% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.54% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.496% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.164% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.344% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.03% (RMSE 8.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.581% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.945% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.492% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.822% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.215% (RMSE 2.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.609% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.046% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.421% (RMSE 3.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.036% (RMSE 7.81 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.435% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.276% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.494% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.246% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.401% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.197% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.835% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.34% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.645% (RMSE 8.12 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.702% (RMSE 7.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.216% (RMSE 8.45 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.426% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.67% (RMSE 8.12 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.962% (RMSE 7.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.362% (RMSE 7.34 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.894% (RMSE 7.44 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.445% (RMSE 6.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.485% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.338% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.195% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.216% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.982% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.339% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.572% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.571% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.093% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.487% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.496% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.195% (RMSE 2.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.703% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.648% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.011% (RMSE 3.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.082% (RMSE 7.23 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.23% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.272% (RMSE 7.5 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.707% (RMSE 7.44 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.183% (RMSE 7.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.276% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.249% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.713% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.822% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.302% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.62% (RMSE 7.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.685% (RMSE 8.12 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.678% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.903% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.737% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.83% (RMSE 7.36 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.003% (RMSE 6.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.458% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.264% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.957% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.007% (RMSE 7.81 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.207% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.208% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.349% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.554% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.555% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.522% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.26% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86% (RMSE 2.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.231% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.123% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.138% (RMSE 4 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.34% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.336% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.526% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.92% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.258% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.977% (RMSE 7.55 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.978% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.232% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.034% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.16% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.524% (RMSE 7.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.558% (RMSE 8.5 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.451% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.392% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.642% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.063% (RMSE 7.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.55% (RMSE 7.25 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.197% (RMSE 7.27 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.959% (RMSE 7.28 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.704% (RMSE 7.33 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.668% (RMSE 7.37 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.795% (RMSE 7.28 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.574% (RMSE 7.41 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.792% (RMSE 7.2 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.488% (RMSE 7.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.145% (RMSE 7.15 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.176% (RMSE 7.23 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.944% (RMSE 2.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.663% (RMSE 7.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.243% (RMSE 6.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.867% (RMSE 3.49 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.815% (RMSE 6.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.912% (RMSE 7.32 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.535% (RMSE 7.17 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.461% (RMSE 7.1 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.613% (RMSE 7.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.174% (RMSE 6.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.292% (RMSE 7.06 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.789% (RMSE 7.24 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.952% (RMSE 7.28 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.179% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.866% (RMSE 6.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.185% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.641% (RMSE 7.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.874% (RMSE 7.24 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.275% (RMSE 7.34 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.146% (RMSE 7.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.437% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.558% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.833% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.23% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.064% (RMSE 7.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.248% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.37% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.511% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.914% (RMSE 8.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.06% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.524% (RMSE 2.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.695% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.214% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.996% (RMSE 3.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.293% (RMSE 7.26 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.462% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.525% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.162% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.171% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.307% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.414% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.539% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.546% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.523% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.859% (RMSE 7.08 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.248% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.687% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.488% (RMSE 7.53 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.722% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.534% (RMSE 6.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.492% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.807% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.815% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.922% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.213% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.632% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.608% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.346% (RMSE 8.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.383% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.545% (RMSE 2.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.132% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.013% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.105% (RMSE 3.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.421% (RMSE 7.34 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.574% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.164% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.536% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.946% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.92% (RMSE 7.55 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.629% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.469% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.558% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.428% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.926% (RMSE 7.08 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.031% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.309% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.641% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.512% (RMSE 7.64 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.193% (RMSE 6.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.312% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.337% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.203% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.318% (RMSE 8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.91% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.722% (RMSE 8.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.42% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.525% (RMSE 2.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.072% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.022% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.043% (RMSE 3.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.386% (RMSE 7.3 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.284% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.199% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.425% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.482% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.896% (RMSE 7.51 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.11% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.411% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.123% (RMSE 7.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.336% (RMSE 8.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.938% (RMSE 7.08 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.975% (RMSE 7.55 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.662% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.202% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.534% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.769% (RMSE 6.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.802% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.151% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.934% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.956% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.242% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.168% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.819% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.669% (RMSE 2.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.707% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.919% (RMSE 7.81 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.389% (RMSE 3.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.68% (RMSE 7.44 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.805% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.231% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.022% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.851% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.231% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.56% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.56% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.277% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.619% (RMSE 8.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.545% (RMSE 7.25 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.259% (RMSE 8.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.163% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.84% (RMSE 7.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.011% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.236% (RMSE 6.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.421% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.585% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.182% (RMSE 8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.723% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.614% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.841% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.024% (RMSE 2.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.469% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.512% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.156% (RMSE 3.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.485% (RMSE 7.33 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.139% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.417% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.021% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.178% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.34% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.041% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.256% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.7% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.222% (RMSE 8.21 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.407% (RMSE 7.1 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.619% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.683% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.736% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.939% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.788% (RMSE 6.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.85% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.557% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.576% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.039% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.514% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.935% (RMSE 2.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.693% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.648% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.399% (RMSE 3.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.529% (RMSE 7.41 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.612% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.7% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.996% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.294% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.169% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.99% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.374% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.776% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.921% (RMSE 8.17 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.925% (RMSE 7.24 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.415% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.217% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.076% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.337% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.323% (RMSE 7.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.129% (RMSE 8.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.883% (RMSE 7.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.586% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.923% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.871% (RMSE 2.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.765% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.942% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.576% (RMSE 3.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.658% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.896% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.656% (RMSE 8.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.1% (RMSE 8.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.491% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.039% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.457% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.742% (RMSE 8.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.608% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.028% (RMSE 8.27 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.53% (RMSE 7.41 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.048% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.719% (RMSE 8.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.798% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.491% (RMSE 7.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.538% (RMSE 7.05 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.652% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.266% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.38% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.728% (RMSE 2.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.304% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.106% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.983% (RMSE 3.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.823% (RMSE 7.32 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.631% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.464% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.946% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.103% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.232% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.413% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.467% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.564% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.126% (RMSE 8.14 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.802% (RMSE 7.16 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.356% (RMSE 8.15 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.121% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.234% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.775% (RMSE 8.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.275% (RMSE 7.34 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.564% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.979% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.189% (RMSE 2.25 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.71% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.45% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.875% (RMSE 3.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.176% (RMSE 7.27 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.005% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.257% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.457% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.12% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.592% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.736% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.717% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.663% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.802% (RMSE 8.23 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.375% (RMSE 7.14 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.997% (RMSE 8.13 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.968% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.742% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.349% (RMSE 8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.16% (RMSE 7.23 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.481% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.638% (RMSE 2.32 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.392% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.081% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.715% (RMSE 3.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.312% (RMSE 7.3 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.785% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.423% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.425% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.871% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.982% (RMSE 7.62 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.692% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.969% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.131% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.954% (RMSE 8.2 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.043% (RMSE 7.19 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.515% (RMSE 8.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.818% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.317% (RMSE 7.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.789% (RMSE 8.13 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.358% (RMSE 7.26 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.532% (RMSE 2.21 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.412% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.24% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.439% (RMSE 3.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.713% (RMSE 7.48 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.615% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.7% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.617% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.706% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.096% (RMSE 7.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.498% (RMSE 7.64 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.017% (RMSE 7.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.114% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.292% (RMSE 8.14 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.717% (RMSE 7.21 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.809% (RMSE 8.06 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.972% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.643% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.181% (RMSE 7.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.636% (RMSE 7.33 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.028% (RMSE 2.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.79% (RMSE 2.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.197% (RMSE 2.25 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.114% (RMSE 2.26 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.829% (RMSE 2.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.477% (RMSE 2.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.696% (RMSE 2.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.354% (RMSE 2.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.14% (RMSE 2.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.953% (RMSE 2.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.041% (RMSE 2.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.131% (RMSE 2.72 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.744% (RMSE 2.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.508% (RMSE 2.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.067% (RMSE 3.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.04% (RMSE 2.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.717% (RMSE 2.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.527% (RMSE 2.45 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.309% (RMSE 2.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.525% (RMSE 6.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.15% (RMSE 4 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.233% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.878% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.421% (RMSE 7.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.574% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.467% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.622% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.652% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.838% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.056% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.398% (RMSE 7.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.366% (RMSE 7.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.411% (RMSE 8.29 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.747% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.342% (RMSE 8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.965% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.253% (RMSE 7.15 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.788% (RMSE 4.03 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.453% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.712% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.696% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.125% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.009% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.343% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.515% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.22% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.711% (RMSE 7.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.752% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.896% (RMSE 6.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.58% (RMSE 8.26 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.09% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.534% (RMSE 7.94 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.351% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.732% (RMSE 7.17 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.654% (RMSE 3.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.237% (RMSE 3.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.609% (RMSE 3.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.743% (RMSE 3.89 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.853% (RMSE 3.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.493% (RMSE 3.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.556% (RMSE 3.9 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.316% (RMSE 3.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.108% (RMSE 3.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.348% (RMSE 4.13 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.33% (RMSE 3.28 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.138% (RMSE 3.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.198% (RMSE 4.07 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.075% (RMSE 4.08 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 75.029% (RMSE 3.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.328% (RMSE 3.19 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.698% (RMSE 7.25 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.385% (RMSE 7.06 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.332% (RMSE 7.57 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.101% (RMSE 7.43 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.079% (RMSE 7.27 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.153% (RMSE 7.43 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.003% (RMSE 7.31 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.113% (RMSE 7.54 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.281% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.473% (RMSE 6.93 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.123% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.278% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.591% (RMSE 7.45 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.691% (RMSE 7.33 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.824% (RMSE 6.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.985% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.859% (RMSE 7.99 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.823% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.709% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.347% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.962% (RMSE 7.92 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.369% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.399% (RMSE 8.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.109% (RMSE 7.19 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.636% (RMSE 7.52 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.364% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.612% (RMSE 7.45 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.829% (RMSE 7.51 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.218% (RMSE 6.82 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.563% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.94% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.29% (RMSE 7.63 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.962% (RMSE 7.77 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.785% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.933% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.399% (RMSE 8.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.759% (RMSE 7.24 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.117% (RMSE 8.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.62% (RMSE 7.87 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.756% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.078% (RMSE 7.96 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.245% (RMSE 7.3 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.586% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.954% (RMSE 7.73 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.369% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.48% (RMSE 7.68 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.631% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.93% (RMSE 8.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.284% (RMSE 7.26 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.783% (RMSE 8.61 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.183% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.074% (RMSE 8.1 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.541% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.451% (RMSE 7.41 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.607% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.331% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.15% (RMSE 7.78 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.104% (RMSE 7.74 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.998% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.921% (RMSE 7.16 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.357% (RMSE 8.49 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.405% (RMSE 7.71 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.851% (RMSE 7.91 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.039% (RMSE 7.85 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.788% (RMSE 7.32 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.623% (RMSE 7.76 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.61% (RMSE 7.65 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.862% (RMSE 7.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.175% (RMSE 7.7 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.496% (RMSE 6.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.964% (RMSE 8.17 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.3% (RMSE 7.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.51% (RMSE 7.79 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.53% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.479% (RMSE 7.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.198% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.737% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.53% (RMSE 7.49 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.224% (RMSE 7.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.411% (RMSE 8.11 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.853% (RMSE 7.51 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.694% (RMSE 7.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.045% (RMSE 7.66 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.958% (RMSE 7.16 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.572% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.792% (RMSE 7.95 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.327% (RMSE 7.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -27.991% (RMSE 8.58 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.51% (RMSE 7.83 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.796% (RMSE 8.02 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.836% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.435% (RMSE 7.49 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.985% (RMSE 8.1 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.485% (RMSE 7.22 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -27.416% (RMSE 8.56 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.889% (RMSE 7.84 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.292% (RMSE 7.86 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.647% (RMSE 7.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.578% (RMSE 7.33 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.622% (RMSE 7.21 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -72.845% (RMSE 9.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.865% (RMSE 7.88 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.907% (RMSE 8.09 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.615% (RMSE 8.19 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.82% (RMSE 7.52 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.244% (RMSE 7.67 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.383% (RMSE 6.98 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.573% (RMSE 7.01 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.585% (RMSE 7.17 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.338% (RMSE 7.18 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.362% (RMSE 8.53 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.48% (RMSE 7.6 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.229% (RMSE 7.59 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.716% (RMSE 6.75 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.804% (RMSE 7.8 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.453% (RMSE 7.97 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.133% (RMSE 7.43 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.636% (RMSE 7.52 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.29% (RMSE 6.69 vs. 7.58 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.928% (RMSE 6.87 vs. 7.58 null)
R-squared by pairs of metrics is explored:
dfSmallR2WS <- as.data.frame(mtxSmallWS) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possWSVars[idx1], var2=possWSVars[idx2], rn=row_number())
dfSmallR2WS %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 shortwave_radiation windspeed_100m 0.917
## 2 diffuse_radiation windspeed_100m 0.915
## 3 windspeed_100m windgusts_10m 0.912
## 4 direct_radiation windspeed_100m 0.912
## 5 windspeed_100m et0_fao_evapotranspiration 0.911
## 6 direct_normal_irradiance windspeed_100m 0.906
## 7 hour windspeed_100m 0.905
## 8 windspeed_100m tod 0.895
## 9 windspeed_100m soil_temperature_0_to_7cm 0.877
## 10 temperature_2m windspeed_100m 0.876
## 11 windspeed_100m soil_moisture_100_to_255cm 0.875
## 12 windspeed_100m vapor_pressure_deficit 0.875
## 13 windspeed_100m src 0.873
## 14 relativehumidity_2m windspeed_100m 0.873
## 15 apparent_temperature windspeed_100m 0.872
## 16 windspeed_100m soil_moisture_7_to_28cm 0.871
## 17 windspeed_100m soil_moisture_0_to_7cm 0.870
## 18 surface_pressure windspeed_100m 0.869
## 19 windspeed_100m soil_moisture_28_to_100cm 0.867
## 20 rain windspeed_100m 0.865
## # ℹ 683 more rows
dfSmallR2WS %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting ground-level wind speed",
y="Range of R2 (min-mean-max)",
x=NULL
)
dfSmallR2WS %>%
arrange(desc(r2)) %>%
filter(!str_detect(var2, "wind"), !str_detect(var1, "wind")) %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 561 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 temperature_2m apparent_temperature 0.392
## 2 et0_fao_evapotranspiration src 0.248
## 3 month src 0.223
## 4 surface_pressure et0_fao_evapotranspiration 0.208
## 5 year src 0.207
## 6 weathercode src 0.192
## 7 hour src 0.185
## 8 relativehumidity_2m src 0.183
## 9 tod src 0.179
## 10 snowfall src 0.178
## 11 precipitation src 0.175
## 12 rain src 0.172
## 13 cloudcover_low src 0.168
## 14 et0_fao_evapotranspiration soil_moisture_100_to_255cm 0.165
## 15 surface_pressure soil_moisture_100_to_255cm 0.159
## 16 soil_temperature_28_to_100cm soil_moisture_100_to_255cm 0.155
## 17 apparent_temperature surface_pressure 0.154
## 18 soil_moisture_100_to_255cm doy 0.154
## 19 cloudcover src 0.152
## 20 surface_pressure soil_temperature_28_to_100cm 0.152
## # ℹ 541 more rows
# dfSmallR2WC %>%
# filter(var2!="precipitation",
# var1!="precipitation",
# !str_detect(var2, "cloudcover"),
# !str_detect(var1, "cloudcover")
# ) %>%
# pivot_longer(cols=c(var1, var2)) %>%
# group_by(value) %>%
# summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
# ggplot(aes(x=fct_reorder(value, r2_mu))) +
# coord_flip() +
# geom_point(aes(y=r2_mu)) +
# geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
# lims(y=c(NA, 1)) +
# geom_hline(yintercept=1, lty=2, color="red") +
# labs(title="Accuracy in every 2-predictor model including self and one other",
# subtitle="Predicting weathercode (excluding variable paired with 'precipitation' or 'cloudcover')",
# y="Range of accuracy (min-mean-max)",
# x=NULL
# )
#
# dfSmallR2WC %>%
# arrange(desc(r2)) %>%
# filter(!str_detect(var2, "rain|snow|precip"),
# !str_detect(var1, "rain|snow|precip"),
# !str_detect(var2, "cloudcover"),
# !str_detect(var1, "cloudcover")
# ) %>%
# select(var1, var2, r2) %>%
# print(n=20)
# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))
The best predictor is high-level wind speed, and the next best predictor is ground-level wind gusts. Every other pair of predictors has R-squared on unseen data of at most ~40% (sometimes even negative)
Select combinations are explored using the full training dataset:
possLargeWS <- c("windspeed_100m", "windgusts_10m", "temperature_2m", "apparent_temperature")
possLargeWS
## [1] "windspeed_100m" "windgusts_10m" "temperature_2m"
## [4] "apparent_temperature"
mtxLargeWS <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeWS)-1)) {
for(idx2 in (idx1+1):length(possLargeWS)) {
r2LargeWS <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="windspeed_10m",
xVars=possLargeWS[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLargeWS <- rbind(mtxLargeWS, c(idx1, idx2, r2LargeWS))
}
}
## Growing trees.. Progress: 42%. Estimated remaining time: 42 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.903% (RMSE 2.16 vs. 7.58 null)
## Growing trees.. Progress: 33%. Estimated remaining time: 1 minute, 2 seconds.
## Growing trees.. Progress: 66%. Estimated remaining time: 31 seconds.
## Growing trees.. Progress: 95%. Estimated remaining time: 4 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.473% (RMSE 2.68 vs. 7.58 null)
## Growing trees.. Progress: 28%. Estimated remaining time: 1 minute, 18 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 96%. Estimated remaining time: 3 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.019% (RMSE 2.73 vs. 7.58 null)
## Growing trees.. Progress: 36%. Estimated remaining time: 54 seconds.
## Growing trees.. Progress: 74%. Estimated remaining time: 22 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.508% (RMSE 3.83 vs. 7.58 null)
## Growing trees.. Progress: 33%. Estimated remaining time: 1 minute, 2 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.793% (RMSE 3.81 vs. 7.58 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 46 seconds.
## Growing trees.. Progress: 76%. Estimated remaining time: 19 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.671% (RMSE 5.54 vs. 7.58 null)
dfLargeR2WS <- as.data.frame(mtxLargeWS) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeWS[idx1], var2=possLargeWS[idx2], rn=row_number())
dfLargeR2WS %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 6 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 windspeed_100m windgusts_10m 0.919
## 2 windspeed_100m temperature_2m 0.875
## 3 windspeed_100m apparent_temperature 0.870
## 4 windgusts_10m apparent_temperature 0.748
## 5 windgusts_10m temperature_2m 0.745
## 6 temperature_2m apparent_temperature 0.467
A model is run to predict evapotranspiration, at first allowing all predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfET0Full <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="et0_fao_evapotranspiration",
xVars=c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "month", "tod"),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 13%. Estimated remaining time: 3 minutes, 31 seconds.
## Growing trees.. Progress: 25%. Estimated remaining time: 3 minutes, 7 seconds.
## Growing trees.. Progress: 38%. Estimated remaining time: 2 minutes, 35 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 1 minute, 57 seconds.
## Growing trees.. Progress: 64%. Estimated remaining time: 1 minute, 28 seconds.
## Growing trees.. Progress: 77%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 89%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.815% (RMSE 0.01 vs. 0.2 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is highly effective at predicting evapotranspiration, primarily by using radiation variables
The linear model is run for evapotranspiration, using all predictors:
# Eliminate diffuse radiation due to rank-deficiency
lmET0Full <- lm(et0_fao_evapotranspiration ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmET0Full)
##
## Call:
## lm(formula = et0_fao_evapotranspiration ~ ., data = allCity %>%
## filter(tt == "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>%
## select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32559 -0.01493 0.00419 0.01915 0.26105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.562e-01 3.663e-02 -20.643 < 2e-16 ***
## hour -2.169e-04 9.607e-06 -22.576 < 2e-16 ***
## temperature_2m -3.631e-03 9.148e-05 -39.685 < 2e-16 ***
## relativehumidity_2m -5.373e-04 9.132e-06 -58.839 < 2e-16 ***
## dewpoint_2m 1.469e-03 3.041e-05 48.292 < 2e-16 ***
## apparent_temperature 2.917e-03 7.690e-05 37.929 < 2e-16 ***
## pressure_msl 1.042e-04 1.365e-05 7.633 2.30e-14 ***
## surface_pressure 6.140e-05 6.081e-06 10.096 < 2e-16 ***
## precipitation 2.603e-02 1.574e-02 1.654 0.098223 .
## rain -2.746e-02 1.575e-02 -1.743 0.081305 .
## snowfall -4.760e-02 2.281e-02 -2.087 0.036930 *
## cloudcover -1.115e-04 1.000e-05 -11.145 < 2e-16 ***
## cloudcover_low -2.894e-05 5.162e-06 -5.606 2.08e-08 ***
## cloudcover_mid 2.012e-05 3.856e-06 5.218 1.81e-07 ***
## cloudcover_high 2.493e-06 2.440e-06 1.022 0.306957
## shortwave_radiation 4.744e-04 1.189e-06 398.899 < 2e-16 ***
## direct_radiation 2.226e-04 1.476e-06 150.816 < 2e-16 ***
## direct_normal_irradiance -1.413e-04 5.139e-07 -274.899 < 2e-16 ***
## windspeed_10m 1.943e-03 3.661e-05 53.065 < 2e-16 ***
## windspeed_100m -7.108e-05 2.309e-05 -3.078 0.002081 **
## winddirection_10m 4.929e-06 9.329e-07 5.284 1.27e-07 ***
## winddirection_100m 6.691e-06 9.407e-07 7.112 1.14e-12 ***
## windgusts_10m 7.022e-04 1.238e-05 56.717 < 2e-16 ***
## weathercode1 2.305e-03 2.451e-04 9.405 < 2e-16 ***
## weathercode2 5.252e-03 4.405e-04 11.922 < 2e-16 ***
## weathercode3 7.297e-03 5.801e-04 12.578 < 2e-16 ***
## weathercode51 6.326e-03 5.209e-04 12.143 < 2e-16 ***
## weathercode53 3.490e-03 6.900e-04 5.058 4.23e-07 ***
## weathercode55 3.565e-03 9.773e-04 3.648 0.000264 ***
## weathercode61 2.875e-03 9.411e-04 3.055 0.002253 **
## weathercode63 1.954e-03 1.548e-03 1.262 0.206807
## weathercode65 3.918e-03 4.239e-03 0.924 0.355287
## weathercode71 3.671e-03 1.048e-03 3.503 0.000460 ***
## weathercode73 7.998e-03 1.518e-03 5.267 1.39e-07 ***
## weathercode75 9.929e-03 4.232e-03 2.346 0.018984 *
## vapor_pressure_deficit 4.726e-02 1.615e-04 292.563 < 2e-16 ***
## soil_temperature_0_to_7cm 1.573e-03 2.715e-05 57.947 < 2e-16 ***
## soil_temperature_7_to_28cm -2.777e-03 4.571e-05 -60.763 < 2e-16 ***
## soil_temperature_28_to_100cm 6.820e-04 5.074e-05 13.439 < 2e-16 ***
## soil_temperature_100_to_255cm 9.320e-04 2.611e-05 35.693 < 2e-16 ***
## soil_moisture_0_to_7cm -3.997e-02 1.572e-03 -25.424 < 2e-16 ***
## soil_moisture_7_to_28cm 2.651e-02 2.238e-03 11.846 < 2e-16 ***
## soil_moisture_28_to_100cm -5.818e-02 1.683e-03 -34.574 < 2e-16 ***
## soil_moisture_100_to_255cm 4.955e-02 1.655e-03 29.937 < 2e-16 ***
## year 3.002e-04 1.766e-05 16.994 < 2e-16 ***
## doy -1.664e-05 7.047e-07 -23.616 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03447 on 368064 degrees of freedom
## Multiple R-squared: 0.9689, Adjusted R-squared: 0.9688
## F-statistic: 2.544e+05 on 45 and 368064 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmET0Full, newdata=.)) %>%
summarize(meModel=mean((pred-et0_fao_evapotranspiration)**2),
meBase=mean((et0_fao_evapotranspiration-mean(et0_fao_evapotranspiration))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 0.00144 0.0392 0.963 0.0380
summary(lmET0Full)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 shortwave_radiation 0.000474 0.00000119 399. 0
## 2 vapor_pressure_deficit 0.0473 0.000162 293. 0
## 3 direct_normal_irradiance -0.000141 0.000000514 -275. 0
## 4 direct_radiation 0.000223 0.00000148 151. 0
## 5 soil_temperature_7_to_28cm -0.00278 0.0000457 -60.8 0
## 6 relativehumidity_2m -0.000537 0.00000913 -58.8 0
## 7 soil_temperature_0_to_7cm 0.00157 0.0000272 57.9 0
## 8 windgusts_10m 0.000702 0.0000124 56.7 0
## 9 windspeed_10m 0.00194 0.0000366 53.1 0
## 10 dewpoint_2m 0.00147 0.0000304 48.3 0
## # ℹ 36 more rows
Even with many confounders, the linear model largely identifies that radiation is a strong predictor for evapotranspiration. The random forest drives higher R-squared and improved RMSE (RMSE 0.01 RF vs. 0.04 linear)
Correlations between predictors and evapotranspiration are assessed:
sapply(varsTrain, FUN=function(x) cor(allCity$et0_fao_evapotranspiration, allCity[[x]])) %>%
as.data.frame() %>%
rownames_to_column("var") %>%
tibble::as_tibble() %>%
purrr::set_names(c("var", "cor")) %>%
ggplot(aes(x=fct_reorder(var, cor), y=cor)) +
geom_col(fill="lightblue") +
geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
coord_flip() +
labs(title="Correlation with evapotranspiration",
y="Correlation",
x=NULL
) +
lims(y=c(NA, 1))
allCity %>%
select(et0_fao_evapotranspiration,
shortwave_radiation,
direct_radiation,
vapor_pressure_deficit,
soil_temperature_0_to_7cm
) %>%
mutate(across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
across(.cols=c(soil_temperature_0_to_7cm), .fns=function(x) round(x, 0)),
across(.cols=c(shortwave_radiation, direct_radiation), .fns=function(x) round(x/25)*25),
rn=row_number()
) %>%
pivot_longer(cols=-c(rn, et0_fao_evapotranspiration)) %>%
count(et0_fao_evapotranspiration, name, value) %>%
ggplot(aes(x=value, y=et0_fao_evapotranspiration)) +
geom_point(aes(size=n), alpha=0.5) +
geom_smooth(aes(weight=n), method="lm") +
facet_wrap(~name, scales="free_x") +
labs(x=NULL, title="Evapotranspiration vs. four potentially strong predictors")
## `geom_smooth()` using formula = 'y ~ x'
A correlation heatmap is produced, borrowing from the recipe provided by STHDA:
# Function copied from STHDA
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
# Create and order correlation matrix and convert to plot-friendly tibble
corAll <- cor(allCity[,varsTrain]) %>% reorder_cormat()
corAll[upper.tri(corAll)] <- NA
corAll <- corAll %>%
reshape2::melt(na.rm=TRUE) %>%
tibble::as_tibble()
corAll
## # A tibble: 666 × 3
## Var1 Var2 value
## <fct> <fct> <dbl>
## 1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm 1
## 2 doy soil_temperature_100_to_255cm 0.522
## 3 dewpoint_2m soil_temperature_100_to_255cm 0.285
## 4 soil_temperature_7_to_28cm soil_temperature_100_to_255cm 0.640
## 5 soil_temperature_28_to_100cm soil_temperature_100_to_255cm 0.825
## 6 soil_temperature_0_to_7cm soil_temperature_100_to_255cm 0.516
## 7 temperature_2m soil_temperature_100_to_255cm 0.538
## 8 apparent_temperature soil_temperature_100_to_255cm 0.535
## 9 winddirection_10m soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m soil_temperature_100_to_255cm -0.103
## # ℹ 656 more rows
# Create heatmap
corAll %>%
ggplot(aes(x=Var2, y=Var1)) +
geom_tile(aes(fill=value)) +
scale_fill_gradient2(NULL, low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1)) +
labs(x=NULL, y=NULL, title="Pearson correlation of weather variables") +
theme(axis.text.x=element_text(angle = 90, vjust = 1, hjust = 1))
The process is converted to functional form:
makeHeatMap <- function(df,
vecSelect=NULL,
groupSimilar=TRUE,
upperTriOnly=TRUE,
plotMap=TRUE,
returnData=FALSE
) {
# FUNCTION ARGUMENTS:
# df: the data frame or tibble
# vecSelect: vector for variables to keep c('keep1', "keep2", ...), NULL means keep all
# groupSimilar: boolean, should similar (highly correlated) variables be placed nearby each other?
# upperTriOnly: boolean, should only the upper triangle be kept?
# plotMap: boolean, should the heatmap be plotted?
# returnData: boolean, should the correlation data driving the heatmap be returned
# Create correlations
df <- df %>%
colSelector(vecSelect=vecSelect) %>%
cor()
# Reorder if requested
if(isTRUE(groupSimilar)) df <- df %>% reorder_cormat()
# Use only the upper triangle if requested
if(isTRUE(upperTriOnly)) df[upper.tri(df)] <- NA
# Convert to tidy tibble
df <- df %>%
reshape2::melt(na.rm=TRUE) %>%
tibble::as_tibble()
# Plot map if requested
if(isTRUE(plotMap)) {
p1 <- df %>%
ggplot(aes(x=Var2, y=Var1)) +
geom_tile(aes(fill=value)) +
scale_fill_gradient2(NULL, low="blue", high="red", mid="white", midpoint=0, limit=c(-1, 1)) +
labs(x=NULL, y=NULL, title="Pearson correlation of key variables") +
theme(axis.text.x=element_text(angle = 90, vjust = 1, hjust = 1))
print(p1)
}
# Return data if requested
if(isTRUE(returnData)) return(df)
}
The functional form is tested:
# Default function
makeHeatMap(allCity, vecSelect=varsTrain)
# Both triangles and return data
makeHeatMap(allCity, vecSelect=varsTrain, upperTriOnly=FALSE, returnData=TRUE)
## # A tibble: 1,296 × 3
## Var1 Var2 value
## <fct> <fct> <dbl>
## 1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm 1
## 2 doy soil_temperature_100_to_255cm 0.522
## 3 dewpoint_2m soil_temperature_100_to_255cm 0.285
## 4 soil_temperature_7_to_28cm soil_temperature_100_to_255cm 0.640
## 5 soil_temperature_28_to_100cm soil_temperature_100_to_255cm 0.825
## 6 soil_temperature_0_to_7cm soil_temperature_100_to_255cm 0.516
## 7 temperature_2m soil_temperature_100_to_255cm 0.538
## 8 apparent_temperature soil_temperature_100_to_255cm 0.535
## 9 winddirection_10m soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m soil_temperature_100_to_255cm -0.103
## # ℹ 1,286 more rows
# No grouping of similar
makeHeatMap(allCity, vecSelect=varsTrain, groupSimilar=FALSE)
# Data only
makeHeatMap(allCity, vecSelect=varsTrain, plotMap=FALSE, returnData=TRUE)
## # A tibble: 666 × 3
## Var1 Var2 value
## <fct> <fct> <dbl>
## 1 soil_temperature_100_to_255cm soil_temperature_100_to_255cm 1
## 2 doy soil_temperature_100_to_255cm 0.522
## 3 dewpoint_2m soil_temperature_100_to_255cm 0.285
## 4 soil_temperature_7_to_28cm soil_temperature_100_to_255cm 0.640
## 5 soil_temperature_28_to_100cm soil_temperature_100_to_255cm 0.825
## 6 soil_temperature_0_to_7cm soil_temperature_100_to_255cm 0.516
## 7 temperature_2m soil_temperature_100_to_255cm 0.538
## 8 apparent_temperature soil_temperature_100_to_255cm 0.535
## 9 winddirection_10m soil_temperature_100_to_255cm -0.0725
## 10 winddirection_100m soil_temperature_100_to_255cm -0.103
## # ℹ 656 more rows
Each variable is run through the random forest standalone, using a smaller training dataset:
# Variables to explore
useET0 <- c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "tod", "month")
useET0
## [1] "hour" "temperature_2m"
## [3] "relativehumidity_2m" "dewpoint_2m"
## [5] "apparent_temperature" "pressure_msl"
## [7] "surface_pressure" "precipitation"
## [9] "rain" "snowfall"
## [11] "cloudcover" "cloudcover_low"
## [13] "cloudcover_mid" "cloudcover_high"
## [15] "shortwave_radiation" "direct_radiation"
## [17] "direct_normal_irradiance" "diffuse_radiation"
## [19] "windspeed_10m" "windspeed_100m"
## [21] "winddirection_10m" "winddirection_100m"
## [23] "windgusts_10m" "weathercode"
## [25] "vapor_pressure_deficit" "soil_temperature_0_to_7cm"
## [27] "soil_temperature_7_to_28cm" "soil_temperature_28_to_100cm"
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"
## [31] "soil_moisture_7_to_28cm" "soil_moisture_28_to_100cm"
## [33] "soil_moisture_100_to_255cm" "year"
## [35] "doy" "src"
## [37] "tod" "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24092614)
idxSmallET0 <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallET0 <- matrix(nrow=0, ncol=3)
# Map each variable to file
rfET0OneSmall <- map_dfr(.x=useET0,
.f=function(x) runFullRF(dfTrain=dfTrainCloud[idxSmallET0,],
yVar="et0_fao_evapotranspiration",
xVars=x,
dfTest=dfTestCloud,
isContVar=TRUE,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]] %>%
t() %>%
as_tibble(),
.id="varNum"
) %>%
mutate(varName=useET0[as.numeric(varNum)])
##
## R-squared of test data is: 57.458% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of test data is: 38.141% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of test data is: 32.273% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of test data is: -10.614% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of test data is: 27.013% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of test data is: -1.641% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -8.885% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of test data is: 1.247% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 1.01% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 0.362% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 4.068% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 3.073% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 1.834% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -1.121% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 84.563% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of test data is: 80.886% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of test data is: 56% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of test data is: 57.067% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of test data is: -4.527% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -7.115% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -0.486% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -2.836% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 9.798% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 4.918% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 49.596% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of test data is: 35.237% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of test data is: 3.488% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 3.094% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: -3.258% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -3.642% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -8.476% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of test data is: -8.737% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of test data is: -6.036% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: -1.33% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of test data is: 4.445% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 4.815% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of test data is: 33.586% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of test data is: 10.331% (RMSE 0.19 vs. 0.2 null)
rfET0OneSmall %>% arrange(desc(r2))
## # A tibble: 38 × 5
## varNum mseNull msePred r2 varName
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 15 0.0392 0.00606 0.846 shortwave_radiation
## 2 16 0.0392 0.00750 0.809 direct_radiation
## 3 1 0.0392 0.0167 0.575 hour
## 4 18 0.0392 0.0168 0.571 diffuse_radiation
## 5 17 0.0392 0.0173 0.560 direct_normal_irradiance
## 6 25 0.0392 0.0198 0.496 vapor_pressure_deficit
## 7 2 0.0392 0.0243 0.381 temperature_2m
## 8 26 0.0392 0.0254 0.352 soil_temperature_0_to_7cm
## 9 37 0.0392 0.0261 0.336 tod
## 10 3 0.0392 0.0266 0.323 relativehumidity_2m
## # ℹ 28 more rows
Each combination of two variables is run through the random forest, using a smaller training dataset:
# Variables to explore
useET0 <- c(varsTrain[!str_detect(varsTrain, "evapo")], "src", "tod", "month")
useET0
## [1] "hour" "temperature_2m"
## [3] "relativehumidity_2m" "dewpoint_2m"
## [5] "apparent_temperature" "pressure_msl"
## [7] "surface_pressure" "precipitation"
## [9] "rain" "snowfall"
## [11] "cloudcover" "cloudcover_low"
## [13] "cloudcover_mid" "cloudcover_high"
## [15] "shortwave_radiation" "direct_radiation"
## [17] "direct_normal_irradiance" "diffuse_radiation"
## [19] "windspeed_10m" "windspeed_100m"
## [21] "winddirection_10m" "winddirection_100m"
## [23] "windgusts_10m" "weathercode"
## [25] "vapor_pressure_deficit" "soil_temperature_0_to_7cm"
## [27] "soil_temperature_7_to_28cm" "soil_temperature_28_to_100cm"
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"
## [31] "soil_moisture_7_to_28cm" "soil_moisture_28_to_100cm"
## [33] "soil_moisture_100_to_255cm" "year"
## [35] "doy" "src"
## [37] "tod" "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24092715)
idxSmallET0 <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallET0 <- matrix(nrow=0, ncol=3)
# Run each combination of variables
for(idx1 in 1:(length(useET0)-1)) {
for(idx2 in (idx1+1):length(useET0)) {
r2SmallET0 <- runFullRF(dfTrain=dfTrainCloud[idxSmallET0,] %>% mutate(weathercode=factor(weathercode)),
yVar="et0_fao_evapotranspiration",
xVars=useET0[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallET0 <- rbind(mtxSmallET0, c(idx1, idx2, r2SmallET0))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.756% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.056% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.785% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.574% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.536% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.156% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 60.635% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.874% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.354% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.645% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.692% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.088% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.795% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.915% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.953% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.097% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.661% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.379% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.851% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.443% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.029% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.756% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.826% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.971% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.86% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 77.659% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.751% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.246% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.275% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.177% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.355% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.361% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.366% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.891% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.896% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.479% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.92% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.46% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.698% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.707% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.062% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.834% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.947% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.958% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.068% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.713% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.498% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.417% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.572% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.545% (RMSE 0.05 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.533% (RMSE 0.05 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.718% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.987% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.095% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.852% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.928% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.416% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.39% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.52% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.67% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.546% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.738% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.881% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.364% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.255% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.421% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.094% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.413% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.777% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.623% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.196% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.077% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.825% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.984% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.552% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.737% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.695% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.458% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.036% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.215% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.905% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.548% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.225% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.804% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.312% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.26% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.004% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.049% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.044% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.952% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.11% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.6% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.296% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.134% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.228% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.789% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.902% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.246% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.256% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.673% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.885% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.832% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.952% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.044% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.868% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.488% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.8% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.889% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.849% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.101% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.038% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.617% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.067% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.687% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.767% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.121% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.248% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.837% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.552% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.402% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.272% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.685% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.589% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.142% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.501% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.063% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.039% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.481% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.154% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.731% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.682% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.728% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.347% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.003% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.728% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.208% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.177% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.135% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.216% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.722% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.494% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.404% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.234% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.783% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.287% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.279% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.487% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.161% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.257% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.882% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.41% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.88% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.118% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.038% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.103% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.421% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.298% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.605% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.52% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.654% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.109% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.604% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.697% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.228% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.672% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.93% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.569% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.68% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.639% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.972% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.825% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.251% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.201% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.727% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.507% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.399% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.82% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.683% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.483% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.822% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.109% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.084% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.484% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.631% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.073% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.363% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.572% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.698% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.939% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.044% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.681% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.187% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.465% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.051% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.908% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.253% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.004% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.692% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.289% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.956% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.476% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.066% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.163% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.453% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.008% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.776% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.266% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.984% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.171% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.041% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.158% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.448% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.498% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.293% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.068% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.808% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.86% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.655% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.92% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.161% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.283% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.488% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.45% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.922% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.224% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.005% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.926% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.59% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.391% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.573% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.127% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.064% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.804% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.965% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.044% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.797% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.023% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.866% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.447% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.469% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.41% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.327% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.73% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.897% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.781% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.207% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.419% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.609% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.948% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.343% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.656% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.625% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.446% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.85% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.58% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.46% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.402% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.395% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.821% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.053% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.725% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.882% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.45% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.146% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.584% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.487% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.459% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.036% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.464% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.414% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.475% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.689% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.048% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.79% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.18% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.471% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.15% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.891% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.737% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.772% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.763% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.792% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.851% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.686% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.392% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.355% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.367% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.956% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.077% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.783% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.46% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.724% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.464% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.554% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.434% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.931% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.919% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.836% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.417% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.09% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.966% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.746% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.844% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.475% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.368% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.73% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.562% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.278% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.027% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.026% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.779% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.314% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.103% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.821% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.496% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.03% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.974% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.405% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.247% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.474% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.243% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.394% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.875% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.346% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.268% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.577% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.078% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.932% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.73% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.894% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.046% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.719% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.53% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.032% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.5% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.997% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.123% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.243% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.52% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.67% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.039% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.506% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.281% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.267% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.136% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.429% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.379% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.455% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.074% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.616% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.042% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.719% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.944% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.751% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.349% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.525% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.706% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.76% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.796% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.99% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.091% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.411% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.268% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.357% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.812% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.463% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.996% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.249% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.55% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.56% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.235% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.332% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.76% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.637% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.177% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.927% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.119% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.48% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.289% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.386% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.373% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.202% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.152% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.761% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.874% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.618% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.409% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.144% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.219% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.152% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.381% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.807% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.99% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.256% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.042% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.521% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.559% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.955% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.342% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.037% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.972% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.676% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.597% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.848% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.765% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.666% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.4% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.566% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.856% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.935% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.407% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.815% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.679% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.97% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.473% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.819% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.36% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.769% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.495% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.787% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.632% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.478% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.884% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.428% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.811% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.973% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.402% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.154% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.302% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.631% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.284% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.847% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.725% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.825% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.234% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.475% (RMSE 0.04 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.61% (RMSE 0.05 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.47% (RMSE 0.05 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.295% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.342% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.804% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.041% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.626% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.48% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.412% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.247% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.763% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.973% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.136% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.797% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.185% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.559% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.111% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.613% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.594% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.793% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.106% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.357% (RMSE 0.05 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 91.737% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 90.184% (RMSE 0.06 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.615% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.467% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.064% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.616% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.346% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.826% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.271% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.614% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 83.678% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 80.814% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.26% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.952% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.089% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.41% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.229% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.385% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.989% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.272% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.351% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.83% (RMSE 0.08 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.068% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.486% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.606% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.292% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.533% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.851% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.39% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.385% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.041% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.167% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.611% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.62% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.359% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.52% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.682% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.028% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.386% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 69.924% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 87.185% (RMSE 0.07 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 78.399% (RMSE 0.09 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.684% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.335% (RMSE 0.11 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.971% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.893% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.455% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 58.128% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.119% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.575% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.282% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.193% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.964% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.813% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.841% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.761% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.982% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.405% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.357% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.876% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.372% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.991% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.119% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.056% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.44% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.338% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.947% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.856% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.471% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.04% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.719% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.155% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.195% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.629% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.346% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.164% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.336% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.085% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.342% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.136% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.431% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.052% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.633% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.719% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.591% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.309% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -21.865% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.498% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.519% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.008% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.22% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.294% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.825% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.75% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.567% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.817% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.872% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.28% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.419% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.869% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.573% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.559% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.558% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.697% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.295% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.566% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.725% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.576% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.589% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.231% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.676% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.033% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.005% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.962% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.012% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.168% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.687% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.11% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.541% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.882% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.013% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.177% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.733% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.371% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.032% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.449% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.921% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.612% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.041% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.55% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.7% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.993% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.142% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.914% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.095% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.412% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.648% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.914% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.676% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.352% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.407% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.304% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.338% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.931% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.261% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.124% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.798% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.297% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.318% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.349% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.921% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.256% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.012% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.776% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.343% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.132% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.519% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.799% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.635% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.738% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.49% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.041% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.814% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.113% (RMSE 0.13 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 74.174% (RMSE 0.1 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.683% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 61.666% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.062% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.738% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.077% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.854% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.329% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.229% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.583% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.649% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.443% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.765% (RMSE 0.12 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.541% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.882% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.31% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.126% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.931% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.742% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.869% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.179% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.545% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.035% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.311% (RMSE 0.14 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.575% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.081% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.854% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.89% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.062% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.938% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.679% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.01% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.616% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.964% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.124% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.544% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.014% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.167% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.994% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.114% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.002% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.151% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.167% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.083% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.74% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.46% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.755% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.333% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.007% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.319% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.69% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.741% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.961% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.627% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -23.633% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.245% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.689% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.286% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.527% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.082% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.402% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.488% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.446% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.777% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.511% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.856% (RMSE 0.22 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.1% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.46% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.806% (RMSE 0.17 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.933% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.709% (RMSE 0.21 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.47% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.522% (RMSE 0.16 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.566% (RMSE 0.19 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.438% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.076% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.852% (RMSE 0.2 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.045% (RMSE 0.15 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.824% (RMSE 0.18 vs. 0.2 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.537% (RMSE 0.14 vs. 0.2 null)
R-squared by pairs of metrics is explored:
dfSmallR2ET0 <- as.data.frame(mtxSmallET0) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=useET0[idx1], var2=useET0[idx2], rn=row_number())
dfSmallR2ET0 %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 shortwave_radiation vapor_pressure_deficit 0.955
## 2 temperature_2m shortwave_radiation 0.945
## 3 shortwave_radiation soil_temperature_0_to_7cm 0.936
## 4 temperature_2m direct_radiation 0.925
## 5 shortwave_radiation soil_temperature_7_to_28cm 0.925
## 6 direct_radiation vapor_pressure_deficit 0.924
## 7 apparent_temperature shortwave_radiation 0.919
## 8 direct_radiation soil_temperature_0_to_7cm 0.917
## 9 shortwave_radiation soil_temperature_28_to_100cm 0.903
## 10 direct_radiation soil_temperature_7_to_28cm 0.902
## 11 apparent_temperature direct_radiation 0.901
## 12 relativehumidity_2m shortwave_radiation 0.893
## 13 surface_pressure shortwave_radiation 0.891
## 14 pressure_msl shortwave_radiation 0.886
## 15 shortwave_radiation soil_temperature_100_to_255cm 0.883
## 16 shortwave_radiation doy 0.882
## 17 shortwave_radiation month 0.881
## 18 hour vapor_pressure_deficit 0.880
## 19 shortwave_radiation soil_moisture_0_to_7cm 0.878
## 20 shortwave_radiation src 0.878
## # ℹ 683 more rows
dfSmallR2ET0 %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting evapotranspiration",
y="Range of R2 (min-mean-max)",
x=NULL
)
dfSmallR2ET0 %>%
arrange(desc(r2)) %>%
filter(!str_detect(var2, "radi"), !str_detect(var1, "radi")) %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 561 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 hour vapor_pressure_deficit 0.880
## 2 hour temperature_2m 0.838
## 3 hour soil_temperature_0_to_7cm 0.829
## 4 hour soil_temperature_7_to_28cm 0.777
## 5 hour apparent_temperature 0.776
## 6 hour month 0.769
## 7 vapor_pressure_deficit tod 0.742
## 8 hour doy 0.729
## 9 hour soil_temperature_28_to_100cm 0.678
## 10 temperature_2m tod 0.671
## 11 hour relativehumidity_2m 0.661
## 12 hour weathercode 0.658
## 13 soil_temperature_0_to_7cm tod 0.658
## 14 hour src 0.649
## 15 windgusts_10m vapor_pressure_deficit 0.644
## 16 hour cloudcover_low 0.627
## 17 hour cloudcover 0.626
## 18 soil_temperature_0_to_7cm soil_temperature_7_to_28cm 0.617
## 19 hour surface_pressure 0.612
## 20 hour cloudcover_mid 0.611
## # ℹ 541 more rows
dfSmallR2ET0 %>%
filter(!str_detect(var2, "radi"), !str_detect(var1, "radi")) %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting evapotranspiration (excluding radiation variables)",
y="Range of R2 (min-mean-max)",
x=NULL
)
# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))
Radiation is generally the best predictor for evapotranspiration, though hour and vapor pressure deficit together also drive ~90% R-squared
Select combinations are explored using the full training dataset:
possLargeET0 <- c("shortwave_radiation", "direct_radiation", "temperature_2m", "hour", "vapor_pressure_deficit")
possLargeET0
## [1] "shortwave_radiation" "direct_radiation" "temperature_2m"
## [4] "hour" "vapor_pressure_deficit"
mtxLargeET0 <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeET0)-1)) {
for(idx2 in (idx1+1):length(possLargeET0)) {
r2LargeET0 <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="et0_fao_evapotranspiration",
xVars=possLargeET0[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLargeET0 <- rbind(mtxLargeET0, c(idx1, idx2, r2LargeET0))
}
}
## Growing trees.. Progress: 46%. Estimated remaining time: 36 seconds.
## Growing trees.. Progress: 86%. Estimated remaining time: 10 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 85.016% (RMSE 0.08 vs. 0.2 null)
## Growing trees.. Progress: 30%. Estimated remaining time: 1 minute, 10 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 96%. Estimated remaining time: 4 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 94.889% (RMSE 0.04 vs. 0.2 null)
## Growing trees.. Progress: 51%. Estimated remaining time: 29 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.281% (RMSE 0.07 vs. 0.2 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 47 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 95.669% (RMSE 0.04 vs. 0.2 null)
## Growing trees.. Progress: 37%. Estimated remaining time: 52 seconds.
## Growing trees.. Progress: 79%. Estimated remaining time: 16 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.789% (RMSE 0.05 vs. 0.2 null)
## Growing trees.. Progress: 67%. Estimated remaining time: 15 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.443% (RMSE 0.08 vs. 0.2 null)
## Growing trees.. Progress: 31%. Estimated remaining time: 1 minute, 10 seconds.
## Growing trees.. Progress: 62%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 93%. Estimated remaining time: 7 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 92.434% (RMSE 0.05 vs. 0.2 null)
## Growing trees.. Progress: 48%. Estimated remaining time: 33 seconds.
## Growing trees.. Progress: 95%. Estimated remaining time: 3 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 86.423% (RMSE 0.07 vs. 0.2 null)
## Growing trees.. Progress: 29%. Estimated remaining time: 1 minute, 15 seconds.
## Growing trees.. Progress: 61%. Estimated remaining time: 39 seconds.
## Growing trees.. Progress: 88%. Estimated remaining time: 12 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.481% (RMSE 0.14 vs. 0.2 null)
## Growing trees.. Progress: 53%. Estimated remaining time: 27 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 89.855% (RMSE 0.06 vs. 0.2 null)
dfLargeR2ET0 <- as.data.frame(mtxLargeET0) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeET0[idx1], var2=possLargeET0[idx2], rn=row_number())
dfLargeR2ET0 %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 10 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 shortwave_radiation vapor_pressure_deficit 0.957
## 2 shortwave_radiation temperature_2m 0.949
## 3 direct_radiation temperature_2m 0.928
## 4 direct_radiation vapor_pressure_deficit 0.924
## 5 hour vapor_pressure_deficit 0.899
## 6 shortwave_radiation hour 0.883
## 7 temperature_2m hour 0.864
## 8 shortwave_radiation direct_radiation 0.850
## 9 direct_radiation hour 0.844
## 10 temperature_2m vapor_pressure_deficit 0.525
A model is run to predict relative humidity, at first allowing all predictors:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
rfRHFull <- runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="relativehumidity_2m",
xVars=c(varsTrain[!str_detect(varsTrain, "humidity_2m$")], "src", "month", "tod"),
dfTest=allCity %>% filter(tt=="test", year==2022),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=TRUE
)
## Growing trees.. Progress: 14%. Estimated remaining time: 3 minutes, 7 seconds.
## Growing trees.. Progress: 27%. Estimated remaining time: 2 minutes, 51 seconds.
## Growing trees.. Progress: 39%. Estimated remaining time: 2 minutes, 27 seconds.
## Growing trees.. Progress: 52%. Estimated remaining time: 1 minute, 58 seconds.
## Growing trees.. Progress: 65%. Estimated remaining time: 1 minute, 25 seconds.
## Growing trees.. Progress: 77%. Estimated remaining time: 56 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 22 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.502% (RMSE 1.86 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'
The model is highly effective at predicting relative humidity, primarily by using vapor pressure deficit. There is a mathematical formula that maps T, D to either RH or VPD, and it is notable that the initial model places more emphasis on the derived predictor (VPD) rather than the two base predictors (T, D)
The linear model is run for relative humidity, using all predictors:
# Eliminate diffuse radiation due to rank-deficiency
lmRHFull <- lm(relativehumidity_2m ~ .,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain)) %>%
select(-diffuse_radiation)
)
summary(lmRHFull)
##
## Call:
## lm(formula = relativehumidity_2m ~ ., data = allCity %>% filter(tt ==
## "train", year < 2022) %>% mutate(weathercode = factor(weathercode)) %>%
## select(all_of(varsTrain)) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.260 -4.302 -0.479 3.717 53.931
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.030e+02 6.583e+00 -15.639 < 2e-16 ***
## hour -2.167e-01 1.690e-03 -128.207 < 2e-16 ***
## temperature_2m -4.008e+00 1.509e-02 -265.635 < 2e-16 ***
## dewpoint_2m 2.350e+00 3.878e-03 606.146 < 2e-16 ***
## apparent_temperature 1.350e+00 1.366e-02 98.811 < 2e-16 ***
## pressure_msl -1.229e-01 2.445e-03 -50.279 < 2e-16 ***
## surface_pressure -9.963e-03 1.093e-03 -9.119 < 2e-16 ***
## precipitation 2.126e+01 2.828e+00 7.517 5.63e-14 ***
## rain -2.109e+01 2.830e+00 -7.454 9.10e-14 ***
## snowfall -2.824e+01 4.098e+00 -6.891 5.54e-12 ***
## cloudcover 2.290e-02 1.797e-03 12.747 < 2e-16 ***
## cloudcover_low 5.843e-02 9.225e-04 63.342 < 2e-16 ***
## cloudcover_mid -2.213e-02 6.919e-04 -31.984 < 2e-16 ***
## cloudcover_high 5.343e-04 4.384e-04 1.219 0.223
## shortwave_radiation -1.934e-02 2.537e-04 -76.225 < 2e-16 ***
## direct_radiation 2.090e-02 2.710e-04 77.119 < 2e-16 ***
## direct_normal_irradiance -2.559e-03 1.013e-04 -25.262 < 2e-16 ***
## windspeed_10m 1.122e-01 6.600e-03 17.003 < 2e-16 ***
## windspeed_100m 1.793e-01 4.138e-03 43.335 < 2e-16 ***
## winddirection_10m -7.227e-03 1.672e-04 -43.230 < 2e-16 ***
## winddirection_100m -1.037e-03 1.690e-04 -6.133 8.63e-10 ***
## windgusts_10m -1.956e-01 2.211e-03 -88.487 < 2e-16 ***
## et0_fao_evapotranspiration -1.734e+01 2.948e-01 -58.839 < 2e-16 ***
## weathercode1 -2.663e-01 4.403e-02 -6.049 1.46e-09 ***
## weathercode2 -9.977e-02 7.916e-02 -1.260 0.208
## weathercode3 -8.681e-01 1.042e-01 -8.328 < 2e-16 ***
## weathercode51 1.843e+00 9.356e-02 19.696 < 2e-16 ***
## weathercode53 3.157e+00 1.239e-01 25.487 < 2e-16 ***
## weathercode55 3.289e+00 1.755e-01 18.742 < 2e-16 ***
## weathercode61 3.318e+00 1.690e-01 19.631 < 2e-16 ***
## weathercode63 3.052e+00 2.781e-01 10.972 < 2e-16 ***
## weathercode65 9.820e-01 7.615e-01 1.289 0.197
## weathercode71 1.040e+00 1.883e-01 5.526 3.28e-08 ***
## weathercode73 2.569e+00 2.728e-01 9.416 < 2e-16 ***
## weathercode75 3.224e+00 7.604e-01 4.240 2.24e-05 ***
## vapor_pressure_deficit 3.095e+00 3.181e-02 97.298 < 2e-16 ***
## soil_temperature_0_to_7cm 7.778e-01 4.730e-03 164.442 < 2e-16 ***
## soil_temperature_7_to_28cm -6.394e-01 8.186e-03 -78.111 < 2e-16 ***
## soil_temperature_28_to_100cm 2.015e-01 9.113e-03 22.111 < 2e-16 ***
## soil_temperature_100_to_255cm -3.332e-01 4.667e-03 -71.402 < 2e-16 ***
## soil_moisture_0_to_7cm 2.679e+01 2.792e-01 95.952 < 2e-16 ***
## soil_moisture_7_to_28cm -6.394e+00 4.021e-01 -15.904 < 2e-16 ***
## soil_moisture_28_to_100cm -1.117e-01 3.028e-01 -0.369 0.712
## soil_moisture_100_to_255cm -1.571e+01 2.966e-01 -52.959 < 2e-16 ***
## year 1.641e-01 3.163e-03 51.876 < 2e-16 ***
## doy 5.508e-03 1.264e-04 43.582 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.193 on 368064 degrees of freedom
## Multiple R-squared: 0.9435, Adjusted R-squared: 0.9434
## F-statistic: 1.365e+05 on 45 and 368064 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmRHFull, newdata=.)) %>%
summarize(meModel=mean((pred-relativehumidity_2m)**2),
meBase=mean((relativehumidity_2m-mean(relativehumidity_2m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 45.7 697. 0.934 6.76
summary(lmRHFull)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
tibble::as_tibble() %>%
arrange(desc(abs(`t value`)))
## # A tibble: 46 × 5
## Variable Estimate `Std. Error` `t value` `Pr(>|t|)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 dewpoint_2m 2.35 0.00388 606. 0
## 2 temperature_2m -4.01 0.0151 -266. 0
## 3 soil_temperature_0_to_7cm 0.778 0.00473 164. 0
## 4 hour -0.217 0.00169 -128. 0
## 5 apparent_temperature 1.35 0.0137 98.8 0
## 6 vapor_pressure_deficit 3.10 0.0318 97.3 0
## 7 soil_moisture_0_to_7cm 26.8 0.279 96.0 0
## 8 windgusts_10m -0.196 0.00221 -88.5 0
## 9 soil_temperature_7_to_28cm -0.639 0.00819 -78.1 0
## 10 direct_radiation 0.0209 0.000271 77.1 0
## # ℹ 36 more rows
The linear model prefers dewpoint and temperature to VPD as predictors for relative humidity. The linear model drives lower R-squared and increased RMSE (RMSE ~1.9 RF vs. ~6.8 linear)
Correlations between predictors and relative humidity are assessed:
sapply(varsTrain, FUN=function(x) cor(allCity$relativehumidity_2m, allCity[[x]])) %>%
as.data.frame() %>%
rownames_to_column("var") %>%
tibble::as_tibble() %>%
purrr::set_names(c("var", "cor")) %>%
ggplot(aes(x=fct_reorder(var, cor), y=cor)) +
geom_col(fill="lightblue") +
geom_text(data=~filter(., abs(cor)>0.2), aes(y=cor/2, label=round(cor, 2)), size=2.5) +
coord_flip() +
labs(title="Correlation with relative humidity",
y="Correlation",
x=NULL
) +
lims(y=c(NA, 1))
allCity %>%
select(relativehumidity_2m,
et0_fao_evapotranspiration,
surface_pressure,
vapor_pressure_deficit,
soil_moisture_0_to_7cm
) %>%
mutate(across(.cols=c(relativehumidity_2m, surface_pressure), .fns=function(x) round(x, 0)),
across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
across(.cols=c(soil_moisture_0_to_7cm), .fns=function(x) round(100*x)/100),
rn=row_number()
) %>%
pivot_longer(cols=-c(rn, relativehumidity_2m)) %>%
count(relativehumidity_2m, name, value) %>%
ggplot(aes(x=value, y=relativehumidity_2m)) +
geom_point(aes(size=n), alpha=0.5) +
geom_smooth(aes(weight=n), method="lm") +
facet_wrap(~name, scales="free_x") +
labs(x=NULL, title="Relative humidity vs. four potentially strong predictors")
## `geom_smooth()` using formula = 'y ~ x'
The correlations are further explored by city:
allCity %>%
select(src,
relativehumidity_2m,
et0_fao_evapotranspiration,
surface_pressure,
vapor_pressure_deficit,
soil_moisture_0_to_7cm
) %>%
mutate(across(.cols=c(relativehumidity_2m, surface_pressure), .fns=function(x) round(x, 0)),
across(.cols=c(et0_fao_evapotranspiration), .fns=function(x) round(20*x)/20),
across(.cols=c(vapor_pressure_deficit), .fns=function(x) round(4*x)/4),
across(.cols=c(soil_moisture_0_to_7cm), .fns=function(x) round(100*x)/100),
rn=row_number()
) %>%
pivot_longer(cols=-c(rn, src, relativehumidity_2m)) %>%
count(src, relativehumidity_2m, name, value) %>%
ggplot(aes(x=value, y=relativehumidity_2m)) +
geom_smooth(aes(weight=n, color=src), method="lm") +
geom_smooth(aes(weight=n), method="lm", lty=2, color="black") +
facet_wrap(~name, scales="free_x") +
labs(x=NULL,
title="Relative humidity vs. four potentially strong predictors",
subtitle="Best lm fit (dashed line is overall, colored lines are by city"
) +
scale_color_discrete(NULL)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Surface pressure appears to be a Simpson’s paradox, with an overall increasing relationship to relative humidity but a generally decreasing relationship when controlled for city. The other plotted predictors have similar trends by individual city and overall
A model using surface pressure only is run, then a model that adds city:
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="relativehumidity_2m",
xVars=c("surface_pressure"),
dfTest=allCity %>% filter(tt=="test", year==2022),
mtry=1,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.659% (RMSE 19.99 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'
keyLabel <- "predictions based on pre-2022 training data applied to 2022 holdout dataset"
runFullRF(dfTrain=allCity %>% filter(tt=="train", year<2022),
yVar="relativehumidity_2m",
xVars=c("surface_pressure", "src"),
dfTest=allCity %>% filter(tt=="test", year==2022),
mtry=2,
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
rndTo=-1L,
refXY=TRUE,
returnData=FALSE
)
## Growing trees.. Progress: 77%. Estimated remaining time: 9 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.232% (RMSE 19.36 vs. 26.4 null)
## `geom_smooth()` using formula = 'y ~ x'
Adding city to the surface pressure model only modestly improves predictive power
The linear model is re-run for relative humidity, using only surface pressure and city:
# Eliminate diffuse radiation due to rank-deficiency
lmRHTwo <- lm(relativehumidity_2m ~ src + src:surface_pressure,
data=allCity %>%
filter(tt=="train", year<2022) %>%
mutate(weathercode=factor(weathercode)) %>%
select(all_of(varsTrain), src) %>%
select(-diffuse_radiation)
)
summary(lmRHTwo)
##
## Call:
## lm(formula = relativehumidity_2m ~ src + src:surface_pressure,
## data = allCity %>% filter(tt == "train", year < 2022) %>%
## mutate(weathercode = factor(weathercode)) %>% select(all_of(varsTrain),
## src) %>% select(-diffuse_radiation))
##
## Residuals:
## Min 1Q Median 3Q Max
## -69.11 -14.30 0.37 14.49 72.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 564.55509 10.02796 56.298 < 2e-16 ***
## srcHouston 205.50657 16.42320 12.513 < 2e-16 ***
## srcLA 1055.67031 22.23287 47.482 < 2e-16 ***
## srcNYC -100.70531 13.65868 -7.373 1.67e-13 ***
## srcVegas -622.35837 16.40974 -37.926 < 2e-16 ***
## srcChicago:surface_pressure -0.49419 0.01008 -49.046 < 2e-16 ***
## srcHouston:surface_pressure -0.68528 0.01282 -53.455 < 2e-16 ***
## srcLA:surface_pressure -1.60176 0.02033 -78.781 < 2e-16 ***
## srcNYC:surface_pressure -0.38764 0.00916 -42.317 < 2e-16 ***
## srcVegas:surface_pressure 0.09303 0.01389 6.700 2.09e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.28 on 368100 degrees of freedom
## Multiple R-squared: 0.4519, Adjusted R-squared: 0.4519
## F-statistic: 3.372e+04 on 9 and 368100 DF, p-value: < 2.2e-16
allCity %>%
filter(tt=="test", year==2022) %>%
mutate(weathercode=factor(weathercode)) %>%
mutate(pred=predict(lmRHTwo, newdata=.)) %>%
summarize(meModel=mean((pred-relativehumidity_2m)**2),
meBase=mean((relativehumidity_2m-mean(relativehumidity_2m))**2),
r2=1-meModel/meBase,
rmse=sqrt(meModel)
)
## # A tibble: 1 × 4
## meModel meBase r2 rmse
## <dbl> <dbl> <dbl> <dbl>
## 1 375. 697. 0.462 19.4
# summary(lmRHFull)$coefficients %>%
# as.data.frame() %>%
# rownames_to_column("Variable") %>%
# tibble::as_tibble() %>%
# arrange(desc(abs(`t value`)))
Results are similar to those from the random forest model
Each combination of two variables is run through the random forest, using a smaller training dataset:
# Variables to explore
useRH <- c(varsTrain[!str_detect(varsTrain, "relativehumidity_2m")], "src", "tod", "month")
useRH
## [1] "hour" "temperature_2m"
## [3] "dewpoint_2m" "apparent_temperature"
## [5] "pressure_msl" "surface_pressure"
## [7] "precipitation" "rain"
## [9] "snowfall" "cloudcover"
## [11] "cloudcover_low" "cloudcover_mid"
## [13] "cloudcover_high" "shortwave_radiation"
## [15] "direct_radiation" "direct_normal_irradiance"
## [17] "diffuse_radiation" "windspeed_10m"
## [19] "windspeed_100m" "winddirection_10m"
## [21] "winddirection_100m" "windgusts_10m"
## [23] "et0_fao_evapotranspiration" "weathercode"
## [25] "vapor_pressure_deficit" "soil_temperature_0_to_7cm"
## [27] "soil_temperature_7_to_28cm" "soil_temperature_28_to_100cm"
## [29] "soil_temperature_100_to_255cm" "soil_moisture_0_to_7cm"
## [31] "soil_moisture_7_to_28cm" "soil_moisture_28_to_100cm"
## [33] "soil_moisture_100_to_255cm" "year"
## [35] "doy" "src"
## [37] "tod" "month"
# Subsets to use (dfTrainCloud and dfTestCloud created previously)
set.seed(24100616)
idxSmallRH <- sample(1:nrow(dfTrainCloud), 5000, replace=FALSE)
mtxSmallRH <- matrix(nrow=0, ncol=3)
# Run each combination of variables
for(idx1 in 1:(length(useRH)-1)) {
for(idx2 in (idx1+1):length(useRH)) {
r2SmallRH <- runFullRF(dfTrain=dfTrainCloud[idxSmallRH,] %>% mutate(weathercode=factor(weathercode)),
yVar="relativehumidity_2m",
xVars=useRH[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxSmallRH <- rbind(mtxSmallRH, c(idx1, idx2, r2SmallRH))
}
}
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.774% (RMSE 25.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.225% (RMSE 22.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.263% (RMSE 26.82 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.833% (RMSE 26.64 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.754% (RMSE 19.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.259% (RMSE 23.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.847% (RMSE 23.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.44% (RMSE 24.56 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.423% (RMSE 22.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.06% (RMSE 20.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.215% (RMSE 23.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.795% (RMSE 25.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.501% (RMSE 24.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.747% (RMSE 23.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.396% (RMSE 23.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.968% (RMSE 25.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.59% (RMSE 27.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.894% (RMSE 27.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.862% (RMSE 26.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.411% (RMSE 26.97 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.133% (RMSE 27.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.948% (RMSE 18.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.398% (RMSE 21.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.294% (RMSE 15.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.116% (RMSE 25.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.767% (RMSE 26.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.829% (RMSE 27.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.35% (RMSE 27.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.028% (RMSE 19.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.618% (RMSE 21.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.357% (RMSE 23.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.96% (RMSE 19.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.463% (RMSE 25.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.494% (RMSE 27.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.005% (RMSE 17.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.955% (RMSE 24.63 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.659% (RMSE 25.37 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.842% (RMSE 1.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.775% (RMSE 14.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.966% (RMSE 25.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.277% (RMSE 18.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.413% (RMSE 24.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.311% (RMSE 24.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.248% (RMSE 25.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.241% (RMSE 23.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.734% (RMSE 21.97 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.207% (RMSE 24.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.869% (RMSE 25.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.333% (RMSE 24.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.309% (RMSE 23.86 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.816% (RMSE 23.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.541% (RMSE 24.97 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.058% (RMSE 26.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.076% (RMSE 26.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.165% (RMSE 25.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.469% (RMSE 25.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.33% (RMSE 26.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.994% (RMSE 20.62 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.789% (RMSE 23.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.816% (RMSE 1.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.969% (RMSE 25.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.483% (RMSE 25.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.078% (RMSE 26.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.439% (RMSE 26.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.894% (RMSE 20.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.195% (RMSE 21.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.896% (RMSE 22.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.531% (RMSE 20.86 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.199% (RMSE 27.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.141% (RMSE 24.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.983% (RMSE 19.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.97% (RMSE 25.6 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.848% (RMSE 24.92 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.113% (RMSE 4.48 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.67% (RMSE 22.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.107% (RMSE 18.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.219% (RMSE 22.52 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.851% (RMSE 22.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.24% (RMSE 23.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.41% (RMSE 21.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.867% (RMSE 20.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.654% (RMSE 22.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.392% (RMSE 23.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.215% (RMSE 20.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.001% (RMSE 19.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.043% (RMSE 19.92 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.284% (RMSE 21.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.095% (RMSE 23.6 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.561% (RMSE 23.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.77% (RMSE 23.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.746% (RMSE 23.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.351% (RMSE 23.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.136% (RMSE 14.9 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.145% (RMSE 21.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.931% (RMSE 0.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 88.857% (RMSE 8.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.615% (RMSE 14.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.591% (RMSE 16.78 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.139% (RMSE 20.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.725% (RMSE 17.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.252% (RMSE 18.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.602% (RMSE 21.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.459% (RMSE 19.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.434% (RMSE 25.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.776% (RMSE 19.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.025% (RMSE 18.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.695% (RMSE 23.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.372% (RMSE 20.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.454% (RMSE 26.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.17% (RMSE 19.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.889% (RMSE 26.01 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.852% (RMSE 26.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.618% (RMSE 27.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.456% (RMSE 24.56 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.349% (RMSE 23.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.42% (RMSE 26.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.058% (RMSE 27.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.44% (RMSE 25.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.127% (RMSE 25.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.256% (RMSE 24.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.888% (RMSE 26.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.356% (RMSE 28.1 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.823% (RMSE 27.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.386% (RMSE 27.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.339% (RMSE 27.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.584% (RMSE 28.01 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.232% (RMSE 20.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.042% (RMSE 25.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.057% (RMSE 2.56 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.369% (RMSE 23.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.742% (RMSE 26.89 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.22% (RMSE 27.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.444% (RMSE 28.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.178% (RMSE 20.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.139% (RMSE 22.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.099% (RMSE 24.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.887% (RMSE 21.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.288% (RMSE 29.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.922% (RMSE 27.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.195% (RMSE 21.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.379% (RMSE 27.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.547% (RMSE 28.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.825% (RMSE 19.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.889% (RMSE 25.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.017% (RMSE 25.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.352% (RMSE 26.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.685% (RMSE 24.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.879% (RMSE 22.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.637% (RMSE 26.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.095% (RMSE 27.7 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.681% (RMSE 24.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.928% (RMSE 24.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.942% (RMSE 23.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.665% (RMSE 25.64 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.898% (RMSE 27.92 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.813% (RMSE 27.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.737% (RMSE 27.9 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.099% (RMSE 27.82 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.327% (RMSE 28.1 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.438% (RMSE 20.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.117% (RMSE 24.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.073% (RMSE 14.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.214% (RMSE 25.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.234% (RMSE 26.1 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.336% (RMSE 26.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.611% (RMSE 27.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.41% (RMSE 21.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.63% (RMSE 22.92 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.178% (RMSE 24.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.787% (RMSE 22.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.381% (RMSE 29.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.489% (RMSE 26.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.671% (RMSE 21.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.371% (RMSE 26.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.895% (RMSE 27.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.114% (RMSE 20.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.913% (RMSE 20.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.668% (RMSE 21.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 44.011% (RMSE 19.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.577% (RMSE 19.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.249% (RMSE 21.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.067% (RMSE 22.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.888% (RMSE 18.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.532% (RMSE 18.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.858% (RMSE 18.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.971% (RMSE 19.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.256% (RMSE 20.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.722% (RMSE 21.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.308% (RMSE 21.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.304% (RMSE 20.9 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.581% (RMSE 20.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.806% (RMSE 15.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.648% (RMSE 20.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.501% (RMSE 11.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.999% (RMSE 19.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.061% (RMSE 20.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.126% (RMSE 21.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.168% (RMSE 21.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.188% (RMSE 20.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.109% (RMSE 21.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.846% (RMSE 22.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.667% (RMSE 21.82 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.909% (RMSE 22.57 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.737% (RMSE 21.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.672% (RMSE 21.34 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.616% (RMSE 21.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.93% (RMSE 21.78 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.265% (RMSE 25.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.27% (RMSE 25.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.156% (RMSE 23.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.141% (RMSE 22.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.215% (RMSE 24.87 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.847% (RMSE 25.34 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.77% (RMSE 24.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.063% (RMSE 23.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.706% (RMSE 23.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.135% (RMSE 24.03 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.029% (RMSE 26.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.013% (RMSE 26.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.853% (RMSE 26.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.241% (RMSE 26.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.487% (RMSE 25.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.212% (RMSE 18.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.663% (RMSE 23.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.398% (RMSE 14.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.607% (RMSE 24.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.05% (RMSE 25.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.289% (RMSE 26.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.663% (RMSE 25.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.706% (RMSE 20.83 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.343% (RMSE 22.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.187% (RMSE 24.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.57% (RMSE 20.52 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.987% (RMSE 25.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.269% (RMSE 26.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.464% (RMSE 19.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.179% (RMSE 24.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.196% (RMSE 25.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.269% (RMSE 25.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.294% (RMSE 23.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.236% (RMSE 22.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.127% (RMSE 24.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.528% (RMSE 25.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.731% (RMSE 24.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.095% (RMSE 23.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.884% (RMSE 23.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.926% (RMSE 24.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.477% (RMSE 26.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.534% (RMSE 26.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.519% (RMSE 26.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.824% (RMSE 26.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.616% (RMSE 25.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.342% (RMSE 18.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.666% (RMSE 23.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.609% (RMSE 14.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.669% (RMSE 24.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.942% (RMSE 25.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.125% (RMSE 26.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.747% (RMSE 25.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.621% (RMSE 20.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.322% (RMSE 22.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.685% (RMSE 24.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.691% (RMSE 20.5 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.316% (RMSE 25.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.542% (RMSE 26.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.473% (RMSE 19.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.737% (RMSE 24.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.704% (RMSE 25.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.677% (RMSE 23.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.13% (RMSE 22.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.5% (RMSE 25.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.567% (RMSE 26.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.225% (RMSE 25.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.777% (RMSE 24.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.831% (RMSE 24.5 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.859% (RMSE 25.34 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.358% (RMSE 27.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.13% (RMSE 27.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.138% (RMSE 27.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.225% (RMSE 27.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.174% (RMSE 26.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.598% (RMSE 19.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.759% (RMSE 23.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.811% (RMSE 15.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.408% (RMSE 25.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.125% (RMSE 26.54 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.905% (RMSE 27.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.378% (RMSE 27.1 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.089% (RMSE 21.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.728% (RMSE 22.9 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.425% (RMSE 24.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.538% (RMSE 21.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.859% (RMSE 26.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.994% (RMSE 27.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.264% (RMSE 20.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.15% (RMSE 25.57 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.382% (RMSE 26.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.42% (RMSE 22.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.836% (RMSE 23.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.51% (RMSE 23.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.668% (RMSE 22.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.666% (RMSE 21.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.296% (RMSE 22.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.714% (RMSE 22.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.663% (RMSE 24.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.583% (RMSE 25.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.406% (RMSE 25.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.906% (RMSE 25.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.263% (RMSE 24.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.183% (RMSE 17.67 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.298% (RMSE 23.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.626% (RMSE 14.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.499% (RMSE 23.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.541% (RMSE 24.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.461% (RMSE 24.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.301% (RMSE 24.86 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.515% (RMSE 20.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.843% (RMSE 21.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.548% (RMSE 22.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.449% (RMSE 20.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.755% (RMSE 25.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.548% (RMSE 25.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.284% (RMSE 18.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.688% (RMSE 22.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.122% (RMSE 24.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.744% (RMSE 22.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.761% (RMSE 22.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.546% (RMSE 20.86 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.949% (RMSE 20.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.545% (RMSE 21.03 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.05% (RMSE 20.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.469% (RMSE 23.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.581% (RMSE 23.67 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.399% (RMSE 23.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.426% (RMSE 23.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.736% (RMSE 22.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.023% (RMSE 16.05 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.002% (RMSE 21.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.556% (RMSE 14.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.31% (RMSE 22.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.201% (RMSE 23.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.026% (RMSE 23.01 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.148% (RMSE 22.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.871% (RMSE 19.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.2% (RMSE 20.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.469% (RMSE 22.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 46.698% (RMSE 19.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.26% (RMSE 22.82 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.614% (RMSE 23.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.952% (RMSE 17.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.712% (RMSE 21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.564% (RMSE 22.62 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.793% (RMSE 26.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.472% (RMSE 24.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.758% (RMSE 23.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.51% (RMSE 23.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.048% (RMSE 24.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.74% (RMSE 26.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.143% (RMSE 27.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.521% (RMSE 27.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.941% (RMSE 27.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.152% (RMSE 26.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.104% (RMSE 18.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.995% (RMSE 23.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.009% (RMSE 14.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.189% (RMSE 24.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.089% (RMSE 26.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.128% (RMSE 27.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.363% (RMSE 26.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.939% (RMSE 21.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.507% (RMSE 22.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.414% (RMSE 24.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.527% (RMSE 21.52 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.018% (RMSE 26.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.653% (RMSE 27.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.629% (RMSE 19.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.931% (RMSE 24.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.548% (RMSE 26.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.052% (RMSE 25.03 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.696% (RMSE 24.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.264% (RMSE 24.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.687% (RMSE 25.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.481% (RMSE 28.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.547% (RMSE 28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.918% (RMSE 28.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.76% (RMSE 28.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.347% (RMSE 27.6 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.82% (RMSE 19.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.363% (RMSE 24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.85% (RMSE 15.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.437% (RMSE 25.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.136% (RMSE 27.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.187% (RMSE 27.83 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.152% (RMSE 27.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.735% (RMSE 21.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.177% (RMSE 23.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.441% (RMSE 25.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.564% (RMSE 22.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -9.262% (RMSE 27.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.591% (RMSE 28.87 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.014% (RMSE 20.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.083% (RMSE 25.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.927% (RMSE 27.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.392% (RMSE 23.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.234% (RMSE 23.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.673% (RMSE 23.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.713% (RMSE 25.63 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.281% (RMSE 25.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.781% (RMSE 25.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.221% (RMSE 25.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.098% (RMSE 25.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.314% (RMSE 17.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.418% (RMSE 22.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.161% (RMSE 15.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.584% (RMSE 24.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.785% (RMSE 24.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.79% (RMSE 25.89 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.894% (RMSE 25.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.553% (RMSE 18.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.553% (RMSE 20.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.761% (RMSE 22.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.172% (RMSE 19.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.123% (RMSE 25.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.914% (RMSE 25.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.609% (RMSE 18.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.579% (RMSE 25.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.79% (RMSE 24.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.985% (RMSE 23.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.925% (RMSE 23.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.024% (RMSE 25.04 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.156% (RMSE 25.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.552% (RMSE 24.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.769% (RMSE 24.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.754% (RMSE 24.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.852% (RMSE 18.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.187% (RMSE 22.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.375% (RMSE 15.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.808% (RMSE 24.08 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.661% (RMSE 24.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.523% (RMSE 25.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.065% (RMSE 24.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 50.476% (RMSE 18.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.27% (RMSE 20.4 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.071% (RMSE 22.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.589% (RMSE 18.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.122% (RMSE 25.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.377% (RMSE 25.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 54.066% (RMSE 17.89 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.3% (RMSE 24.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.545% (RMSE 24.54 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.544% (RMSE 23.82 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.182% (RMSE 24.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.468% (RMSE 24.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.142% (RMSE 24.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.122% (RMSE 24.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.216% (RMSE 24.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 51.106% (RMSE 18.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.643% (RMSE 22.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.908% (RMSE 14.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.675% (RMSE 23.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 18.629% (RMSE 23.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.155% (RMSE 24.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.384% (RMSE 24.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.622% (RMSE 18.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.978% (RMSE 19.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.539% (RMSE 21.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.83% (RMSE 18.7 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 13.291% (RMSE 24.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.589% (RMSE 24.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 55.481% (RMSE 17.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.67% (RMSE 24.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.608% (RMSE 24.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.789% (RMSE 26.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -3.472% (RMSE 26.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.692% (RMSE 26.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.216% (RMSE 26.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.87% (RMSE 26.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.377% (RMSE 17.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.485% (RMSE 22.63 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.303% (RMSE 15.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 9.479% (RMSE 25.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.394% (RMSE 25.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.801% (RMSE 26.63 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.358% (RMSE 26.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 43.99% (RMSE 19.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.644% (RMSE 21.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.747% (RMSE 23.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.334% (RMSE 20.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.571% (RMSE 26.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -2.533% (RMSE 26.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.202% (RMSE 19.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.017% (RMSE 25.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.249% (RMSE 26.23 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.329% (RMSE 26.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.121% (RMSE 28.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.974% (RMSE 28.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.975% (RMSE 28.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.785% (RMSE 20.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.086% (RMSE 24.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.65% (RMSE 15.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.471% (RMSE 26.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.954% (RMSE 27.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.134% (RMSE 28.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.414% (RMSE 28.36 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.499% (RMSE 20.7 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.088% (RMSE 23.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.715% (RMSE 25.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.412% (RMSE 21.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -32.755% (RMSE 30.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.902% (RMSE 28.78 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.864% (RMSE 20.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.703% (RMSE 27.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -25.185% (RMSE 29.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.634% (RMSE 28.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.555% (RMSE 28.62 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.578% (RMSE 27.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.157% (RMSE 20.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.147% (RMSE 25.44 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 64.846% (RMSE 15.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.36% (RMSE 25.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.899% (RMSE 27.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.875% (RMSE 28.54 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.023% (RMSE 27.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.593% (RMSE 21.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.191% (RMSE 23.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.129% (RMSE 25.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.29% (RMSE 21.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -26.29% (RMSE 29.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.354% (RMSE 28.35 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.955% (RMSE 21.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.093% (RMSE 27.7 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -20.859% (RMSE 29.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.375% (RMSE 29.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.917% (RMSE 28.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.832% (RMSE 20.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.845% (RMSE 25.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.665% (RMSE 14.78 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 2.977% (RMSE 26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.571% (RMSE 27.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -13.528% (RMSE 28.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.28% (RMSE 28.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.355% (RMSE 21.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.634% (RMSE 23.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.262% (RMSE 25.01 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 28.473% (RMSE 22.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.51% (RMSE 30.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -16.581% (RMSE 28.5 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.881% (RMSE 21.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -5.692% (RMSE 27.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.777% (RMSE 29.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -19.428% (RMSE 28.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.199% (RMSE 21.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 6.171% (RMSE 25.57 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.182% (RMSE 14.89 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.648% (RMSE 25.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.135% (RMSE 27.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.857% (RMSE 28.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.222% (RMSE 28.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.587% (RMSE 20.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.988% (RMSE 23.01 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.992% (RMSE 24.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.353% (RMSE 21.87 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -30.094% (RMSE 30.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.564% (RMSE 28.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.172% (RMSE 21.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -7.052% (RMSE 27.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.897% (RMSE 29.26 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.042% (RMSE 20.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.982% (RMSE 23.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.103% (RMSE 15.37 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.581% (RMSE 26.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -8.383% (RMSE 27.48 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -17.841% (RMSE 28.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -15.813% (RMSE 28.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.504% (RMSE 20.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.293% (RMSE 22.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.235% (RMSE 24.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.51% (RMSE 21.2 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -24.561% (RMSE 29.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.103% (RMSE 28.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.675% (RMSE 19.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.272% (RMSE 26.56 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.614% (RMSE 28.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 57.813% (RMSE 17.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 67.774% (RMSE 14.98 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.274% (RMSE 20.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.615% (RMSE 21.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.718% (RMSE 21.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.372% (RMSE 21.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 62.753% (RMSE 16.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 56.379% (RMSE 17.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.611% (RMSE 18.17 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 63.497% (RMSE 15.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.913% (RMSE 21.13 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.133% (RMSE 20.76 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.58% (RMSE 14.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.217% (RMSE 18.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 42.631% (RMSE 19.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 68.366% (RMSE 14.85 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 19.747% (RMSE 23.65 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.605% (RMSE 24.68 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.151% (RMSE 25.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 11.201% (RMSE 24.87 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.106% (RMSE 20.43 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 32.446% (RMSE 21.7 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.807% (RMSE 23.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.878% (RMSE 20.64 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.222% (RMSE 24.16 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.514% (RMSE 25.93 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 52.786% (RMSE 18.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.204% (RMSE 22.21 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.501% (RMSE 23.39 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 93.046% (RMSE 6.96 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 84.912% (RMSE 10.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.886% (RMSE 12.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 66.648% (RMSE 15.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.918% (RMSE 13.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.086% (RMSE 13.69 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 70.565% (RMSE 14.32 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.905% (RMSE 13.74 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.882% (RMSE 16.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 82.557% (RMSE 11.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 76.541% (RMSE 12.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 65.525% (RMSE 15.5 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 81.639% (RMSE 11.31 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.382% (RMSE 23.11 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 8.561% (RMSE 25.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 0.814% (RMSE 26.29 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.197% (RMSE 20.58 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.739% (RMSE 21.97 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 22.906% (RMSE 23.18 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.344% (RMSE 21.55 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.317% (RMSE 27.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 14.104% (RMSE 24.46 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 38.72% (RMSE 20.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.435% (RMSE 25.67 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 7.3% (RMSE 25.42 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -1.612% (RMSE 26.61 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.26% (RMSE 28.71 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 35.547% (RMSE 21.19 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.652% (RMSE 22.91 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 17.718% (RMSE 23.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.676% (RMSE 22.45 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -22.918% (RMSE 29.27 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.841% (RMSE 26.51 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.586% (RMSE 21.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.521% (RMSE 26.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -4.776% (RMSE 27.02 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -34.868% (RMSE 30.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.916% (RMSE 21.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.956% (RMSE 23.47 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 12.705% (RMSE 24.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.594% (RMSE 22.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -29.977% (RMSE 30.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -18.63% (RMSE 28.75 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.658% (RMSE 22.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.838% (RMSE 27.28 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -14.441% (RMSE 28.24 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.286% (RMSE 21.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.055% (RMSE 23.15 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.692% (RMSE 24.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.116% (RMSE 23.59 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -28.749% (RMSE 29.95 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -12.252% (RMSE 27.97 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 31.534% (RMSE 21.84 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -0.907% (RMSE 26.52 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.174% (RMSE 27.83 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.312% (RMSE 21.07 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 30.931% (RMSE 21.94 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 33.516% (RMSE 21.52 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.173% (RMSE 22.22 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.998% (RMSE 20.79 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 36.172% (RMSE 21.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.732% (RMSE 20.49 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 34.876% (RMSE 21.3 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 23.706% (RMSE 23.06 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 29.788% (RMSE 22.12 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.887% (RMSE 23.48 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 21.856% (RMSE 23.33 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.162% (RMSE 22.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 27.932% (RMSE 22.41 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 16.729% (RMSE 24.09 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 24.116% (RMSE 22.99 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 10.843% (RMSE 24.92 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 3.98% (RMSE 25.87 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 20.907% (RMSE 23.48 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 15.631% (RMSE 24.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 1.097% (RMSE 26.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 26.284% (RMSE 22.66 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.864% (RMSE 22.73 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 37.42% (RMSE 20.88 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 40.399% (RMSE 20.38 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.397% (RMSE 22.8 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -35.457% (RMSE 30.72 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 39.504% (RMSE 20.53 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 4.226% (RMSE 25.83 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -6.591% (RMSE 27.25 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 25.325% (RMSE 22.81 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -11.665% (RMSE 27.89 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: -10.705% (RMSE 27.77 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 49.111% (RMSE 18.83 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 41.791% (RMSE 20.14 vs. 26.4 null)
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 5.111% (RMSE 25.71 vs. 26.4 null)
R-squared by pairs of metrics is explored:
dfSmallR2RH <- as.data.frame(mtxSmallRH) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=useRH[idx1], var2=useRH[idx2], rn=row_number())
dfSmallR2RH %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 703 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 dewpoint_2m vapor_pressure_deficit 0.999
## 2 temperature_2m dewpoint_2m 0.998
## 3 temperature_2m vapor_pressure_deficit 0.998
## 4 apparent_temperature vapor_pressure_deficit 0.991
## 5 dewpoint_2m apparent_temperature 0.971
## 6 vapor_pressure_deficit soil_temperature_0_to_7cm 0.930
## 7 dewpoint_2m soil_temperature_0_to_7cm 0.889
## 8 vapor_pressure_deficit soil_temperature_7_to_28cm 0.849
## 9 vapor_pressure_deficit doy 0.826
## 10 vapor_pressure_deficit month 0.816
## 11 surface_pressure vapor_pressure_deficit 0.795
## 12 vapor_pressure_deficit soil_temperature_28_to_100cm 0.769
## 13 vapor_pressure_deficit src 0.765
## 14 vapor_pressure_deficit soil_moisture_7_to_28cm 0.731
## 15 vapor_pressure_deficit soil_moisture_0_to_7cm 0.729
## 16 vapor_pressure_deficit soil_moisture_100_to_255cm 0.729
## 17 cloudcover_low vapor_pressure_deficit 0.716
## 18 pressure_msl vapor_pressure_deficit 0.711
## 19 dewpoint_2m soil_temperature_7_to_28cm 0.706
## 20 et0_fao_evapotranspiration src 0.706
## # ℹ 683 more rows
dfSmallR2RH %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting relative humidity",
y="Range of R2 (min-mean-max)",
x=NULL
)
dfSmallR2RH %>%
arrange(desc(r2)) %>%
filter(!str_detect(var2, "vapor|dewpo"), !str_detect(var1, "vapor|dewpo")) %>%
select(var1, var2, r2) %>%
print(n=20)
## # A tibble: 630 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 et0_fao_evapotranspiration src 0.706
## 2 temperature_2m apparent_temperature 0.678
## 3 surface_pressure et0_fao_evapotranspiration 0.668
## 4 et0_fao_evapotranspiration soil_moisture_100_to_255cm 0.635
## 5 cloudcover_low et0_fao_evapotranspiration 0.630
## 6 et0_fao_evapotranspiration soil_moisture_0_to_7cm 0.628
## 7 et0_fao_evapotranspiration weathercode 0.578
## 8 diffuse_radiation et0_fao_evapotranspiration 0.574
## 9 et0_fao_evapotranspiration soil_moisture_7_to_28cm 0.564
## 10 hour src 0.560
## 11 direct_normal_irradiance src 0.555
## 12 cloudcover et0_fao_evapotranspiration 0.552
## 13 cloudcover_low src 0.550
## 14 shortwave_radiation et0_fao_evapotranspiration 0.543
## 15 direct_radiation src 0.541
## 16 surface_pressure direct_normal_irradiance 0.529
## 17 weathercode src 0.528
## 18 direct_normal_irradiance soil_moisture_0_to_7cm 0.526
## 19 et0_fao_evapotranspiration soil_moisture_28_to_100cm 0.526
## 20 shortwave_radiation src 0.526
## # ℹ 610 more rows
dfSmallR2RH %>%
filter(!str_detect(var2, "vapor|dewpo"), !str_detect(var1, "vapor|dewpo")) %>%
pivot_longer(cols=c(var1, var2)) %>%
group_by(value) %>%
summarize(across(r2, .fns=list("min"=min, "mu"=mean, "max"=max))) %>%
ggplot(aes(x=fct_reorder(value, r2_mu))) +
coord_flip() +
geom_point(aes(y=r2_mu)) +
geom_errorbar(aes(ymin=r2_min, ymax=r2_max)) +
lims(y=c(NA, 1)) +
geom_hline(yintercept=1, lty=2, color="red") +
labs(title="R-squared in every 2-predictor model including self and one other",
subtitle="Predicting relative humidity (excluding dewpoint and VPD)",
y="Range of R2 (min-mean-max)",
x=NULL
)
# Null accuracy would pick the most frequent observation
# allCity %>% count(weathercode, sort=TRUE) %>% mutate(pct=n/sum(n))
Combinations of vapor pressure deficit, dewpoint, and temperature tend to strongly predict relative humidity. Absent those, evapotranspiration and location (city) are generally the next best predictors
Select combinations are explored using the full training dataset:
possLargeRH <- c("temperature_2m",
"dewpoint_2m",
"vapor_pressure_deficit",
"apparent_temperature",
"et0_fao_evapotranspiration",
"src"
)
possLargeRH
## [1] "temperature_2m" "dewpoint_2m"
## [3] "vapor_pressure_deficit" "apparent_temperature"
## [5] "et0_fao_evapotranspiration" "src"
mtxLargeRH <- matrix(nrow=0, ncol=3)
for(idx1 in 1:(length(possLargeRH)-1)) {
for(idx2 in (idx1+1):length(possLargeRH)) {
r2LargeRH <- runFullRF(dfTrain=dfTrainCloud[,] %>% mutate(weathercode=factor(weathercode)),
yVar="relativehumidity_2m",
xVars=possLargeRH[c(idx1, idx2)],
dfTest=dfTestCloud %>% mutate(weathercode=factor(weathercode)),
useLabel=keyLabel,
useSub=stringr::str_to_sentence(keyLabel),
isContVar=TRUE,
mtry=2,
makePlots=FALSE,
returnData=TRUE
)[["rfAcc"]][["r2"]]
mtxLargeRH <- rbind(mtxLargeRH, c(idx1, idx2, r2LargeRH))
}
}
## Growing trees.. Progress: 37%. Estimated remaining time: 52 seconds.
## Growing trees.. Progress: 70%. Estimated remaining time: 26 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 1 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.984% (RMSE 0.33 vs. 26.4 null)
## Growing trees.. Progress: 48%. Estimated remaining time: 33 seconds.
## Growing trees.. Progress: 92%. Estimated remaining time: 5 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.973% (RMSE 0.44 vs. 26.4 null)
## Growing trees.. Progress: 35%. Estimated remaining time: 57 seconds.
## Growing trees.. Progress: 71%. Estimated remaining time: 25 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.766% (RMSE 14.03 vs. 26.4 null)
## Growing trees.. Progress: 45%. Estimated remaining time: 37 seconds.
## Growing trees.. Progress: 90%. Estimated remaining time: 6 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 47.777% (RMSE 19.08 vs. 26.4 null)
## Growing trees.. Progress: 74%. Estimated remaining time: 10 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 53.701% (RMSE 17.96 vs. 26.4 null)
## Growing trees.. Progress: 52%. Estimated remaining time: 28 seconds.
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.985% (RMSE 0.32 vs. 26.4 null)
## Growing trees.. Progress: 40%. Estimated remaining time: 46 seconds.
## Growing trees.. Progress: 80%. Estimated remaining time: 15 seconds.
## Growing trees.. Progress: 100%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 97.335% (RMSE 4.31 vs. 26.4 null)
## Growing trees.. Progress: 57%. Estimated remaining time: 23 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 73.139% (RMSE 13.68 vs. 26.4 null)
## Growing trees.. Progress: 99%. Estimated remaining time: 0 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 59.472% (RMSE 16.8 vs. 26.4 null)
## Growing trees.. Progress: 43%. Estimated remaining time: 40 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 6 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 99.14% (RMSE 2.45 vs. 26.4 null)
## Growing trees.. Progress: 45%. Estimated remaining time: 38 seconds.
## Growing trees.. Progress: 91%. Estimated remaining time: 5 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 71.764% (RMSE 14.03 vs. 26.4 null)
## Growing trees.. Progress: 90%. Estimated remaining time: 3 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 79.906% (RMSE 11.83 vs. 26.4 null)
## Growing trees.. Progress: 36%. Estimated remaining time: 55 seconds.
## Growing trees.. Progress: 71%. Estimated remaining time: 25 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 45.867% (RMSE 19.42 vs. 26.4 null)
## Growing trees.. Progress: 69%. Estimated remaining time: 14 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 48.349% (RMSE 18.97 vs. 26.4 null)
## Growing trees.. Progress: 96%. Estimated remaining time: 1 seconds.
##
## R-squared of predictions based on pre-2022 training data applied to 2022 holdout dataset is: 72.428% (RMSE 13.86 vs. 26.4 null)
dfLargeR2RH <- as.data.frame(mtxLargeRH) %>%
purrr::set_names(c("idx1", "idx2", "r2")) %>%
tibble::as_tibble() %>%
mutate(var1=possLargeRH[idx1], var2=possLargeRH[idx2], rn=row_number())
dfLargeR2RH %>% arrange(desc(r2)) %>% select(var1, var2, r2) %>% print(n=20)
## # A tibble: 15 × 3
## var1 var2 r2
## <chr> <chr> <dbl>
## 1 dewpoint_2m vapor_pressure_deficit 1.00
## 2 temperature_2m dewpoint_2m 1.00
## 3 temperature_2m vapor_pressure_deficit 1.00
## 4 vapor_pressure_deficit apparent_temperature 0.991
## 5 dewpoint_2m apparent_temperature 0.973
## 6 vapor_pressure_deficit src 0.799
## 7 dewpoint_2m et0_fao_evapotranspiration 0.731
## 8 et0_fao_evapotranspiration src 0.724
## 9 temperature_2m apparent_temperature 0.718
## 10 vapor_pressure_deficit et0_fao_evapotranspiration 0.718
## 11 dewpoint_2m src 0.595
## 12 temperature_2m src 0.537
## 13 apparent_temperature src 0.483
## 14 temperature_2m et0_fao_evapotranspiration 0.478
## 15 apparent_temperature et0_fao_evapotranspiration 0.459
Data for Miami are downloaded, cached to avoid multiple hits to the server:
# Hourly data download for Miami, FL
testURLHourly <- helperOpenMeteoURL(cityName="Miami FL",
hourlyIndices=1:nrow(tblMetricsHourly),
startDate="2010-01-01",
endDate="2023-12-31",
tz="US/Eastern"
)
##
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=25.78&longitude=-80.21&start_date=2010-01-01&end_date=2023-12-31&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=US%2FEastern"
# Download file
if(!file.exists("testOM_hourly_mia.json")) {
fileDownload(fileName="testOM_hourly_mia.json", url=testURLHourly)
} else {
cat("\nFile testOM_hourly_mia.json already exists, skipping download\n")
}
##
## File testOM_hourly_mia.json already exists, skipping download
# Daily data download for Miami, FL
testURLDaily <- helperOpenMeteoURL(cityName="Miami FL",
dailyIndices=1:nrow(tblMetricsDaily),
startDate="2010-01-01",
endDate="2023-12-31",
tz="US/Eastern"
)
##
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=25.78&longitude=-80.21&start_date=2010-01-01&end_date=2023-12-31&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=US%2FEastern"
# Download file
if(!file.exists("testOM_daily_mia.json")) {
fileDownload(fileName="testOM_daily_mia.json", url=testURLDaily)
} else {
cat("\nFile testOM_daily_mia.json already exists, skipping download\n")
}
##
## File testOM_daily_mia.json already exists, skipping download
The daily and hourly datasets are loaded:
# Read daily JSON file
miaOMDaily <- formatOpenMeteoJSON("testOM_daily_mia.json")
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
##
## $tblDaily
## # A tibble: 5,113 × 18
## date time weathercode temperature_2m_max temperature_2m_min
## <date> <chr> <int> <dbl> <dbl>
## 1 2010-01-01 2010-01-01 53 26.6 17.5
## 2 2010-01-02 2010-01-02 1 18 11.6
## 3 2010-01-03 2010-01-03 51 16.7 11.3
## 4 2010-01-04 2010-01-04 3 15.5 9
## 5 2010-01-05 2010-01-05 3 14.9 9.4
## 6 2010-01-06 2010-01-06 1 13.8 6.3
## 7 2010-01-07 2010-01-07 1 16.6 8.6
## 8 2010-01-08 2010-01-08 2 22.3 11.6
## 9 2010-01-09 2010-01-09 61 18.3 6.8
## 10 2010-01-10 2010-01-10 3 9.3 3.3
## # ℹ 5,103 more rows
## # ℹ 13 more variables: apparent_temperature_max <dbl>,
## # apparent_temperature_min <dbl>, precipitation_sum <dbl>, rain_sum <dbl>,
## # snowfall_sum <dbl>, precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## # windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## # winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## # et0_fao_evapotranspiration <dbl>
##
## $tblHourly
## NULL
##
## $tblUnits
## # A tibble: 17 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 daily_units time "iso8601" <NA>
## 2 daily_units weathercode "wmo code" The most severe weather co…
## 3 daily_units temperature_2m_max "deg C" Maximum and minimum daily …
## 4 daily_units temperature_2m_min "deg C" Maximum and minimum daily …
## 5 daily_units apparent_temperature_max "deg C" Maximum and minimum daily …
## 6 daily_units apparent_temperature_min "deg C" Maximum and minimum daily …
## 7 daily_units precipitation_sum "mm" Sum of daily precipitation…
## 8 daily_units rain_sum "mm" Sum of daily rain
## 9 daily_units snowfall_sum "cm" Sum of daily snowfall
## 10 daily_units precipitation_hours "h" The number of hours with r…
## 11 daily_units sunrise "iso8601" Sun rise and set times
## 12 daily_units sunset "iso8601" Sun rise and set times
## 13 daily_units windspeed_10m_max "km/h" Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max "km/h" Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg " Dominant wind direction
## 16 daily_units shortwave_radiation_sum "MJ/m²" The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm" Daily sum of ET0 Reference…
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 25.8 -80.2 122. -14400 US/Eastern
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 25.7645
## longitude: -80.19607
## generationtime_ms: 121.9139
## utc_offset_seconds: -14400
## timezone: US/Eastern
## timezone_abbreviation: EDT
## elevation: 4
# Read hourly JSON file
miaTemp <- formatOpenMeteoJSON("testOM_hourly_mia.json", addVars=TRUE)
##
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
##
## $tblDaily
## NULL
##
## $tblHourly
## # A tibble: 122,712 × 37
## time date hour temperature_2m relativehumidity_2m
## <dttm> <date> <int> <dbl> <int>
## 1 2010-01-01 00:00:00 2010-01-01 0 23 74
## 2 2010-01-01 01:00:00 2010-01-01 1 22.9 73
## 3 2010-01-01 02:00:00 2010-01-01 2 22.8 72
## 4 2010-01-01 03:00:00 2010-01-01 3 21.9 84
## 5 2010-01-01 04:00:00 2010-01-01 4 21.7 83
## 6 2010-01-01 05:00:00 2010-01-01 5 21.7 82
## 7 2010-01-01 06:00:00 2010-01-01 6 21.6 82
## 8 2010-01-01 07:00:00 2010-01-01 7 21.7 82
## 9 2010-01-01 08:00:00 2010-01-01 8 21.7 82
## 10 2010-01-01 09:00:00 2010-01-01 9 22 81
## # ℹ 122,702 more rows
## # ℹ 32 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
##
## $tblUnits
## # A tibble: 34 × 4
## metricType name value description
## <chr> <chr> <chr> <chr>
## 1 hourly_units time iso8601 <NA>
## 2 hourly_units temperature_2m deg C Air temperature at 2 meters above …
## 3 hourly_units relativehumidity_2m % Relative humidity at 2 meters abov…
## 4 hourly_units dewpoint_2m deg C Dew point temperature at 2 meters …
## 5 hourly_units apparent_temperature deg C Apparent temperature is the percei…
## 6 hourly_units pressure_msl hPa Atmospheric air pressure reduced t…
## 7 hourly_units surface_pressure hPa Atmospheric air pressure reduced t…
## 8 hourly_units precipitation mm Total precipitation (rain, showers…
## 9 hourly_units rain mm Only liquid precipitation of the p…
## 10 hourly_units snowfall cm Snowfall amount of the preceding h…
## # ℹ 24 more rows
##
## $tblDescription
## # A tibble: 1 × 7
## latitude longitude generationtime_ms utc_offset_seconds timezone
## <dbl> <dbl> <dbl> <int> <chr>
## 1 25.8 -80.2 180. -14400 US/Eastern
## # ℹ 2 more variables: timezone_abbreviation <chr>, elevation <dbl>
##
##
## latitude: 25.7645
## longitude: -80.19607
## generationtime_ms: 179.885
## utc_offset_seconds: -14400
## timezone: US/Eastern
## timezone_abbreviation: EDT
## elevation: 4
##
## Rows: 122,712
## Columns: 80
## $ time <dttm> 2010-01-01 00:00:00, 2010-01-01 01:…
## $ date <date> 2010-01-01, 2010-01-01, 2010-01-01,…
## $ hour <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ temperature_2m <dbl> 23.0, 22.9, 22.8, 21.9, 21.7, 21.7, …
## $ relativehumidity_2m <int> 74, 73, 72, 84, 83, 82, 82, 82, 82, …
## $ dewpoint_2m <dbl> 18.1, 17.8, 17.5, 19.1, 18.8, 18.5, …
## $ apparent_temperature <dbl> 24.9, 24.8, 24.6, 24.2, 23.5, 23.2, …
## $ pressure_msl <dbl> 1019.7, 1019.5, 1018.8, 1018.3, 1017…
## $ surface_pressure <dbl> 1019.2, 1019.0, 1018.3, 1017.8, 1017…
## $ precipitation <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ rain <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cloudcover <int> 43, 50, 42, 40, 40, 45, 33, 51, 43, …
## $ cloudcover_low <int> 12, 14, 9, 3, 2, 5, 9, 29, 25, 12, 1…
## $ cloudcover_mid <int> 6, 14, 10, 16, 15, 17, 21, 25, 3, 11…
## $ cloudcover_high <int> 96, 96, 92, 93, 98, 100, 42, 33, 63,…
## $ shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 150, …
## $ direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 61, 12…
## $ direct_normal_irradiance <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 89, 1…
## $ windspeed_10m <dbl> 7.1, 6.4, 5.7, 7.2, 9.8, 11.5, 10.7,…
## $ windspeed_100m <dbl> 9.6, 8.5, 8.7, 10.8, 13.0, 14.4, 15.…
## $ winddirection_10m <int> 210, 232, 198, 180, 172, 182, 213, 2…
## $ winddirection_100m <int> 193, 208, 187, 176, 174, 180, 207, 2…
## $ windgusts_10m <dbl> 18.4, 18.7, 18.4, 11.2, 15.1, 17.6, …
## $ et0_fao_evapotranspiration <dbl> 0.02, 0.02, 0.01, 0.00, 0.01, 0.02, …
## $ weathercode <int> 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, …
## $ vapor_pressure_deficit <dbl> 0.72, 0.76, 0.77, 0.41, 0.43, 0.47, …
## $ soil_temperature_0_to_7cm <dbl> 22.3, 22.1, 21.9, 21.5, 20.9, 20.8, …
## $ soil_temperature_7_to_28cm <dbl> 23.2, 23.2, 23.1, 23.1, 23.0, 22.9, …
## $ soil_temperature_28_to_100cm <dbl> 23.2, 23.2, 23.2, 23.2, 23.2, 23.2, …
## $ soil_temperature_100_to_255cm <dbl> 26.3, 26.3, 26.3, 26.3, 26.3, 26.3, …
## $ soil_moisture_0_to_7cm <dbl> 0.052, 0.052, 0.052, 0.053, 0.053, 0…
## $ soil_moisture_7_to_28cm <dbl> 0.135, 0.135, 0.135, 0.135, 0.135, 0…
## $ soil_moisture_28_to_100cm <dbl> 0.151, 0.151, 0.151, 0.151, 0.151, 0…
## $ soil_moisture_100_to_255cm <dbl> 0.142, 0.142, 0.142, 0.142, 0.142, 0…
## $ origTime <chr> "2010-01-01T00:00", "2010-01-01T01:0…
## $ year <dbl> 2010, 2010, 2010, 2010, 2010, 2010, …
## $ month <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
## $ fct_hour <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ tod <fct> Night, Night, Night, Night, Night, N…
## $ doy <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ season <fct> Winter, Winter, Winter, Winter, Wint…
## $ todSeason <fct> Winter-Night, Winter-Night, Winter-N…
## $ pct_hour <dbl> 0, 4, 8, 13, 17, 21, 25, 29, 33, 38,…
## $ pct_temperature_2m <dbl> 25, 24, 23, 17, 16, 16, 15, 16, 16, …
## $ pct_relativehumidity_2m <dbl> 41, 38, 36, 73, 69, 66, 66, 66, 66, …
## $ pct_dewpoint_2m <dbl> 27, 25, 24, 33, 31, 29, 29, 29, 29, …
## $ pct_apparent_temperature <dbl> 31, 30, 29, 27, 23, 22, 22, 22, 22, …
## $ pct_pressure_msl <dbl> 79, 77, 70, 65, 58, 52, 40, 43, 39, …
## $ pct_surface_pressure <dbl> 79, 77, 70, 65, 58, 52, 40, 43, 39, …
## $ pct_precipitation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_rain <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_snowfall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_cloudcover <dbl> 64, 72, 63, 60, 60, 67, 50, 73, 64, …
## $ pct_cloudcover_low <dbl> 53, 57, 44, 26, 22, 32, 44, 79, 75, …
## $ pct_cloudcover_mid <dbl> 43, 60, 52, 63, 61, 65, 70, 75, 35, …
## $ pct_cloudcover_high <dbl> 83, 83, 79, 80, 86, 91, 60, 57, 66, …
## $ pct_shortwave_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 61, 6…
## $ pct_direct_normal_irradiance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 63, 6…
## $ pct_diffuse_radiation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 65, 8…
## $ pct_windspeed_10m <dbl> 18, 15, 12, 19, 33, 43, 38, 35, 37, …
## $ pct_windspeed_100m <dbl> 17, 13, 14, 21, 30, 36, 41, 41, 41, …
## $ pct_winddirection_10m <dbl> 76, 79, 73, 68, 66, 69, 76, 78, 79, …
## $ pct_winddirection_100m <dbl> 74, 77, 72, 69, 68, 70, 77, 79, 79, …
## $ pct_windgusts_10m <dbl> 26, 27, 26, 7, 16, 24, 28, 30, 28, 3…
## $ pct_et0_fao_evapotranspiration <dbl> 19, 19, 12, 0, 12, 19, 19, 12, 19, 3…
## $ pct_weathercode <dbl> 30, 30, 30, 30, 30, 30, 30, 62, 30, …
## $ pct_vapor_pressure_deficit <dbl> 48, 52, 53, 18, 20, 24, 23, 23, 24, …
## $ pct_soil_temperature_0_to_7cm <dbl> 18, 17, 16, 14, 11, 11, 10, 10, 11, …
## $ pct_soil_temperature_7_to_28cm <dbl> 17, 17, 16, 16, 15, 15, 14, 14, 14, …
## $ pct_soil_temperature_28_to_100cm <dbl> 12, 12, 12, 12, 12, 12, 12, 12, 12, …
## $ pct_soil_temperature_100_to_255cm <dbl> 52, 52, 52, 52, 52, 52, 52, 52, 52, …
## $ pct_soil_moisture_0_to_7cm <dbl> 18, 18, 18, 19, 19, 20, 20, 20, 20, …
## $ pct_soil_moisture_7_to_28cm <dbl> 43, 43, 43, 43, 43, 43, 43, 43, 43, …
## $ pct_soil_moisture_28_to_100cm <dbl> 51, 51, 51, 51, 51, 51, 51, 51, 51, …
## $ pct_soil_moisture_100_to_255cm <dbl> 38, 38, 38, 38, 38, 38, 38, 38, 38, …
## $ pct_year <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ pct_doy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## # A tibble: 8 × 4
## todSeason season tod n
## <fct> <fct> <fct> <int>
## 1 Spring-Day Spring Day 15456
## 2 Spring-Night Spring Night 15456
## 3 Summer-Day Summer Day 15456
## 4 Summer-Night Summer Night 15456
## 5 Fall-Day Fall Day 15288
## 6 Fall-Night Fall Night 15288
## 7 Winter-Day Winter Day 15156
## 8 Winter-Night Winter Night 15156
## # A tibble: 24 × 4
## hour fct_hour tod n
## <int> <fct> <fct> <int>
## 1 0 0 Night 5113
## 2 1 1 Night 5113
## 3 2 2 Night 5113
## 4 3 3 Night 5113
## 5 4 4 Night 5113
## 6 5 5 Night 5113
## 7 6 6 Night 5113
## 8 7 7 Day 5113
## 9 8 8 Day 5113
## 10 9 9 Day 5113
## 11 10 10 Day 5113
## 12 11 11 Day 5113
## 13 12 12 Day 5113
## 14 13 13 Day 5113
## 15 14 14 Day 5113
## 16 15 15 Day 5113
## 17 16 16 Day 5113
## 18 17 17 Day 5113
## 19 18 18 Day 5113
## 20 19 19 Night 5113
## 21 20 20 Night 5113
## 22 21 21 Night 5113
## 23 22 22 Night 5113
## 24 23 23 Night 5113
## # A tibble: 12 × 3
## month season n
## <fct> <fct> <int>
## 1 Jan Winter 10416
## 2 Feb Winter 9480
## 3 Mar Spring 10416
## 4 Apr Spring 10080
## 5 May Spring 10416
## 6 Jun Summer 10080
## 7 Jul Summer 10416
## 8 Aug Summer 10416
## 9 Sep Fall 10080
## 10 Oct Fall 10416
## 11 Nov Fall 10080
## 12 Dec Winter 10416
An integrated set of all-city test and train data is updated:
# Bind all the data frames
allCity <- list("NYC"=nycTemp,
"LA"=laxTemp,
"Chicago"=chiTemp,
"Houston"=houTemp,
"Vegas"=lasTemp,
"Miami"=miaTemp
) %>%
bind_rows(.id="src")
# Create the index for training data
set.seed(24101115)
idxTrain_v2 <- sample(1:nrow(allCity), size = round(0.7*nrow(allCity)), replace=FALSE)
# Add test-train flag to full dataset
allCity <- allCity %>%
mutate(tt=ifelse(row_number() %in% idxTrain_v2, "train", "test"),
fct_src=factor(src))
allCity
## # A tibble: 731,496 × 83
## src time date hour temperature_2m relativehumidity_2m
## <chr> <dttm> <date> <int> <dbl> <int>
## 1 NYC 2010-01-01 00:00:00 2010-01-01 0 -1.1 95
## 2 NYC 2010-01-01 01:00:00 2010-01-01 1 -1 96
## 3 NYC 2010-01-01 02:00:00 2010-01-01 2 -1 96
## 4 NYC 2010-01-01 03:00:00 2010-01-01 3 -0.8 97
## 5 NYC 2010-01-01 04:00:00 2010-01-01 4 -0.9 97
## 6 NYC 2010-01-01 05:00:00 2010-01-01 5 -0.8 97
## 7 NYC 2010-01-01 06:00:00 2010-01-01 6 -0.7 97
## 8 NYC 2010-01-01 07:00:00 2010-01-01 7 -0.5 97
## 9 NYC 2010-01-01 08:00:00 2010-01-01 8 -0.6 97
## 10 NYC 2010-01-01 09:00:00 2010-01-01 9 -0.6 97
## # ℹ 731,486 more rows
## # ℹ 77 more variables: dewpoint_2m <dbl>, apparent_temperature <dbl>,
## # pressure_msl <dbl>, surface_pressure <dbl>, precipitation <dbl>,
## # rain <dbl>, snowfall <dbl>, cloudcover <int>, cloudcover_low <int>,
## # cloudcover_mid <int>, cloudcover_high <int>, shortwave_radiation <dbl>,
## # direct_radiation <dbl>, direct_normal_irradiance <dbl>,
## # diffuse_radiation <dbl>, windspeed_10m <dbl>, windspeed_100m <dbl>, …
# Review counts by year
allCity %>%
count(year, src, tt) %>%
pivot_wider(id_cols=c("src", "tt"), names_from="year", values_from="n")
## # A tibble: 12 × 16
## src tt `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018`
## <chr> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Chicago test 2687 2616 2596 2605 2674 2626 2647 2638 2660
## 2 Chicago train 6073 6144 6188 6155 6086 6134 6137 6122 6100
## 3 Houston test 2608 2595 2611 2573 2702 2609 2602 2613 2597
## 4 Houston train 6152 6165 6173 6187 6058 6151 6182 6147 6163
## 5 LA test 2661 2519 2675 2671 2632 2655 2640 2589 2637
## 6 LA train 6099 6241 6109 6089 6128 6105 6144 6171 6123
## 7 Miami test 2622 2594 2637 2627 2554 2656 2630 2575 2594
## 8 Miami train 6138 6166 6147 6133 6206 6104 6154 6185 6166
## 9 NYC test 2629 2596 2639 2633 2653 2619 2669 2650 2624
## 10 NYC train 6131 6164 6145 6127 6107 6141 6115 6110 6136
## 11 Vegas test 2659 2549 2614 2695 2619 2655 2610 2601 2585
## 12 Vegas train 6101 6211 6170 6065 6141 6105 6174 6159 6175
## # ℹ 5 more variables: `2019` <int>, `2020` <int>, `2021` <int>, `2022` <int>,
## # `2023` <int>
Distributions of several key variables are explored:
keyVars <- c('temperature_2m',
'relativehumidity_2m',
'dewpoint_2m',
'shortwave_radiation',
'vapor_pressure_deficit',
'soil_temperature_28_to_100cm',
'soil_temperature_100_to_255cm',
'soil_moisture_28_to_100cm',
'soil_moisture_100_to_255cm'
)
allCity %>%
colSelector(vecSelect=c("src", keyVars)) %>%
pivot_longer(cols=-c(src)) %>%
ggplot(aes(x=src, y=value)) +
geom_boxplot(aes(fill=src)) +
facet_wrap(~name, scales="free_y") +
labs(x=NULL, y=NULL, title="Distribution of Key Metrics by City") +
scale_fill_discrete(NULL)
At a glance, Miami seems similar to Houston on several metrics. The scatter of temperature and dewpoint is also explored:
allCity %>%
select(t=temperature_2m, d=dewpoint_2m, src) %>%
mutate(across(.cols=where(is.numeric), .fns=function(x) round(x))) %>%
count(src, t, d) %>%
ggplot(aes(x=t, y=d)) +
geom_point(aes(size=n, color=src), alpha=0.1) +
geom_smooth(aes(color=src, weight=n), method="lm") +
labs(x="Temperature (C)", y="Dewpoint (C)", title="Temperature vs. Dewpoint", subtitle="Hourly") +
scale_color_discrete(NULL) +
scale_size_continuous("# Obs")
## `geom_smooth()` using formula = 'y ~ x'
allCity %>%
group_by(src) %>%
summarize(cor_td=cor(temperature_2m, dewpoint_2m))
## # A tibble: 6 × 2
## src cor_td
## <chr> <dbl>
## 1 Chicago 0.950
## 2 Houston 0.834
## 3 LA 0.273
## 4 Miami 0.792
## 5 NYC 0.919
## 6 Vegas 0.371
Miami exhibits strong correlation between temperature and dewpoint, most similar in magnitude to Houston
A basic rpart model is run excluding Miami, with predictions made on holdout data including Miami:
tstRP2 <- allCity %>%
filter(tt=="train", src!="Miami") %>%
select(fct_src, all_of(varsTrain)) %>%
rpart::rpart(fct_src ~ ., data=., method="class")
# Variable importances
tstRP2$variable.importance %>%
as.data.frame() %>%
purrr::set_names("varImp") %>%
rownames_to_column("predictor")
## predictor varImp
## 1 soil_moisture_100_to_255cm 271772.8906
## 2 surface_pressure 229044.1367
## 3 soil_moisture_28_to_100cm 156413.4398
## 4 soil_moisture_7_to_28cm 154321.9803
## 5 soil_moisture_0_to_7cm 148439.1314
## 6 soil_temperature_100_to_255cm 63015.8130
## 7 relativehumidity_2m 37580.7621
## 8 soil_temperature_28_to_100cm 30614.2617
## 9 soil_temperature_7_to_28cm 21126.8896
## 10 windspeed_100m 17917.6043
## 11 soil_temperature_0_to_7cm 15009.6385
## 12 doy 14887.9829
## 13 apparent_temperature 13103.0382
## 14 temperature_2m 12479.4021
## 15 year 5524.8864
## 16 dewpoint_2m 955.1846
# Predictions
allCity %>%
mutate(pred=predict(tstRP2, newdata=., type="class")) %>%
count(tt, src, pred) %>%
ggplot(aes(x=src, y=pred)) +
geom_tile(aes(fill=n)) +
scale_fill_continuous(low="white", high="green") +
facet_wrap(~tt) +
geom_text(aes(label=n), size=2.5)
# Accuracy on holdout
allCity %>%
mutate(pred=predict(tstRP2, newdata=., type="class")) %>%
group_by(tt, src) %>%
summarize(acc=mean(src==pred), .groups="drop") %>%
pivot_wider(id_cols="src", names_from="tt", values_from="acc")
## # A tibble: 6 × 3
## src test train
## <chr> <dbl> <dbl>
## 1 Chicago 0.995 0.996
## 2 Houston 0.983 0.983
## 3 LA 1.00 1.00
## 4 Miami 0 0
## 5 NYC 0.989 0.989
## 6 Vegas 1.00 1.00
A basic tree model works well to split the data by city, and almost always predicts Miami as LA when trained without Miami data